read.scm 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897
  1. ;;; Scheme reader
  2. ;;; Copyright (C) 1995-1997,1999-2001,2003-2004,2006-2012,2014-2021
  3. ;;; Free Software Foundation, Inc.
  4. ;;;
  5. ;;; This library is free software: you can redistribute it and/or modify
  6. ;;; it under the terms of the GNU Lesser General Public License as
  7. ;;; published by the Free Software Foundation, either version 3 of the
  8. ;;; License, or (at your option) any later version.
  9. ;;;
  10. ;;; This library is distributed in the hope that it will be useful, but
  11. ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. ;;; Lesser General Public License for more details.
  14. ;;;
  15. ;;; You should have received a copy of the GNU Lesser General Public
  16. ;;; License along with this program. If not, see
  17. ;;; <http://www.gnu.org/licenses/>.
  18. ;;; Commentary:
  19. ;;;
  20. ;;; Implementation of Scheme's "read".
  21. ;;;
  22. ;;; Code:
  23. ;; While porting read.c to Scheme, I found these expressions that result
  24. ;; in undesirable behavior in the C reader. Most all of them are also
  25. ;; present in the Scheme reader. Probably I should fix all of them, but
  26. ;; I would first like to prove that the Scheme reader is good enough.
  27. ;;
  28. ;; (call-with-input-string "," read)
  29. ;; (read-disable 'square-brackets), then (call-with-input-string "]" read)
  30. ;; (call-with-input-string "(#tru1)" read) => '(#t ru1)
  31. ;; (call-with-input-string "(#true1)" read) => '(#t 1)
  32. ;; (call-with-input-string "(#fAlse)" read) => '(#f Alse)
  33. ;; (call-with-input-string "(#f1 #f2 #f3)" read) => error reading array
  34. ;; #: foo
  35. ;; #:#|what|#foo
  36. ;; #@-(1 2 3) => #(1 2 3)
  37. ;; (#*10101010102) => (#*1010101010 2)
  38. (define-syntax let*-values
  39. (syntax-rules ()
  40. ((_ () . body) (let () . body))
  41. ((_ ((vars expr) . binds) . body)
  42. (call-with-values (lambda () expr)
  43. (lambda vars (let*-values binds . body))))))
  44. (define bitfield:record-positions? 0)
  45. (define bitfield:case-insensitive? 2)
  46. (define bitfield:keyword-style 4)
  47. (define bitfield:r6rs-escapes? 6)
  48. (define bitfield:square-brackets? 8)
  49. (define bitfield:hungry-eol-escapes? 10)
  50. (define bitfield:curly-infix? 12)
  51. (define bitfield:r7rs-symbols? 14)
  52. (define read-option-bits 16)
  53. (define read-option-mask #b11)
  54. (define read-option-inherit #b11)
  55. (define read-options-inherit-all (1- (ash 1 read-option-bits)))
  56. (define keyword-style-hash-prefix 0)
  57. (define keyword-style-prefix 1)
  58. (define keyword-style-postfix 2)
  59. (define (compute-reader-options port)
  60. (let ((options (read-options))
  61. (port-options (or (%port-property port 'port-read-options)
  62. read-options-inherit-all)))
  63. (define-syntax-rule (option field exp)
  64. (let ((port-option (logand port-options (ash read-option-mask field))))
  65. (if (= port-option (ash read-option-inherit field))
  66. exp
  67. port-option)))
  68. (define (bool key field)
  69. (option field
  70. (if (memq key options) (ash 1 field) 0)))
  71. (define (enum key values field)
  72. (option field
  73. (ash (assq-ref values (and=> (memq key options) cadr)) field)))
  74. (logior (bool 'positions bitfield:record-positions?)
  75. (bool 'case-insensitive bitfield:case-insensitive?)
  76. (enum 'keywords '((#f . 0) (prefix . 1) (postfix . 2))
  77. bitfield:keyword-style)
  78. (bool 'r6rs-hex-escapes bitfield:r6rs-escapes?)
  79. (bool 'square-brackets bitfield:square-brackets?)
  80. (bool 'hungry-eol-escapes bitfield:hungry-eol-escapes?)
  81. (bool 'curly-infix bitfield:curly-infix?)
  82. (bool 'r7rs-symbols bitfield:r7rs-symbols?))))
  83. (define (set-option options field new)
  84. (logior (ash new field) (logand options (lognot (ash #b11 field)))))
  85. (define (set-port-read-option! port field value)
  86. (%set-port-property! port 'port-read-options
  87. (set-option (or (%port-property port 'port-read-options)
  88. read-options-inherit-all)
  89. field value)))
  90. (define (%read port annotate strip-annotation)
  91. ;; init read options
  92. (define opts (compute-reader-options port))
  93. (define (enabled? field)
  94. (not (zero? (logand (ash 1 field) opts))))
  95. (define (set-reader-option! field value)
  96. (set! opts (set-option opts field value))
  97. (set-port-read-option! port field value))
  98. (define (case-insensitive?) (enabled? bitfield:case-insensitive?))
  99. (define (keyword-style) (logand read-option-mask
  100. (ash opts (- bitfield:keyword-style))))
  101. (define (r6rs-escapes?) (enabled? bitfield:r6rs-escapes?))
  102. (define (square-brackets?) (enabled? bitfield:square-brackets?))
  103. (define (hungry-eol-escapes?) (enabled? bitfield:hungry-eol-escapes?))
  104. (define (curly-infix?) (enabled? bitfield:curly-infix?))
  105. (define (r7rs-symbols?) (enabled? bitfield:r7rs-symbols?))
  106. (define neoteric 0)
  107. (define (next) (read-char port))
  108. (define (peek) (peek-char port))
  109. (define filename (port-filename port))
  110. (define (get-pos) (cons (port-line port) (port-column port)))
  111. ;; We are only ever interested in whether an object is a char or not.
  112. (define (eof-object? x) (not (char? x)))
  113. (define (input-error msg args)
  114. (scm-error 'read-error #f
  115. (format #f "~A:~S:~S: ~A"
  116. (or filename "#<unknown port>")
  117. (1+ (port-line port))
  118. (1+ (port-column port))
  119. msg)
  120. args #f))
  121. (define-syntax-rule (error msg arg ...)
  122. (let ((args (list arg ...)))
  123. (input-error msg args)))
  124. (define (read-semicolon-comment)
  125. (let ((ch (next)))
  126. (cond
  127. ((eof-object? ch) ch)
  128. ((eqv? ch #\newline) (next))
  129. (else (read-semicolon-comment)))))
  130. (define-syntax-rule (take-until first pred)
  131. (let lp ((out (list first)))
  132. (let ((ch (peek)))
  133. (if (or (eof-object? ch) (pred ch))
  134. (reverse-list->string out)
  135. (begin
  136. (next)
  137. (lp (cons ch out)))))))
  138. (define-syntax-rule (take-while first pred)
  139. (take-until first (lambda (ch) (not (pred ch)))))
  140. (define (delimiter? ch)
  141. (case ch
  142. ((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab) #t)
  143. ((#\[ #\]) (or (square-brackets?) (curly-infix?)))
  144. ((#\{ #\}) (curly-infix?))
  145. (else #f)))
  146. (define (read-token ch)
  147. (take-until ch delimiter?))
  148. (define (read-mixed-case-symbol ch)
  149. (let* ((str (read-token ch))
  150. (len (string-length str)))
  151. (cond
  152. ((and (eq? (keyword-style) keyword-style-postfix)
  153. (> len 1) (eqv? #\: (string-ref str (1- len))))
  154. (let ((str (substring str 0 (1- len))))
  155. (symbol->keyword
  156. (string->symbol
  157. (if (case-insensitive?)
  158. (string-downcase str)
  159. str)))))
  160. (else
  161. (string->symbol
  162. (if (case-insensitive?)
  163. (string-downcase str)
  164. str))))))
  165. (define (read-parenthesized rdelim)
  166. (define (finish-curly-infix ret)
  167. ;; Perform syntactic transformations on {...} lists.
  168. (define (extract-infix-list ls)
  169. (and (pair? ls)
  170. (let ((x (car ls))
  171. (ls (cdr ls)))
  172. (and (pair? ls)
  173. (let ((op (car ls))
  174. (ls (cdr ls)))
  175. (if (and (pair? ls) (null? (cdr ls)))
  176. (cons* op x ls)
  177. (let ((tail (extract-infix-list ls)))
  178. (and tail
  179. (equal? (strip-annotation op)
  180. (strip-annotation (car tail)))
  181. (cons* op x (cdr tail))))))))))
  182. (cond
  183. ((not (eqv? rdelim #\})) ret) ; Only on {...} lists.
  184. ((not (pair? ret)) ret) ; {} => (); {.x} => x
  185. ((null? (cdr ret)) (car ret)); {x} => x
  186. ((and (pair? (cdr ret)) (null? (cddr ret))) ret) ; {x y} => (x y)
  187. ((extract-infix-list ret)) ; {x + y + ... + z} => (+ x y ... z)
  188. (else (cons '$nfx$ ret)))) ; {x y . z} => ($nfx$ x y . z)
  189. (define curly? (eqv? rdelim #\}))
  190. (finish-curly-infix
  191. (let lp ((ch (next-non-whitespace)))
  192. (when (eof-object? ch)
  193. (error "unexpected end of input while searching for: ~A"
  194. rdelim))
  195. (cond
  196. ((eqv? ch rdelim) '())
  197. ((or (eqv? ch #\))
  198. (and (eqv? ch #\]) (or (square-brackets?) (curly-infix?)))
  199. (and (eqv? ch #\}) (curly-infix?)))
  200. (error "mismatched close paren: ~A" ch))
  201. (else
  202. (let ((expr (read-expr ch)))
  203. ;; Note that it is possible for scm_read_expression to
  204. ;; return `.', but not as part of a dotted pair: as in
  205. ;; #{.}#. Indeed an example is here!
  206. (if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#))
  207. (let* ((tail (read-subexpression "tail of improper list"))
  208. (close (next-non-whitespace)))
  209. (unless (eqv? close rdelim)
  210. (error "missing close paren: ~A" close))
  211. tail)
  212. (cons expr (lp (next-non-whitespace))))))))))
  213. (define (hex-digit ch)
  214. (case ch
  215. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  216. (- (char->integer ch) (char->integer #\0)))
  217. ((#\a #\b #\c #\d #\e #\f)
  218. (+ 10 (- (char->integer ch) (char->integer #\a))))
  219. ((#\A #\B #\C #\D #\E #\F)
  220. (+ 10 (- (char->integer ch) (char->integer #\A))))
  221. (else #f)))
  222. (define (read-r6rs-hex-escape)
  223. (let ((ch (next)))
  224. (cond
  225. ((hex-digit ch) =>
  226. (lambda (res)
  227. (let lp ((res res))
  228. (let ((ch (next)))
  229. (cond
  230. ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
  231. ((eqv? ch #\;) (integer->char res))
  232. ((eof-object? ch)
  233. (error "unexpected end of input in character escape sequence"))
  234. (else
  235. (error "invalid character in escape sequence: ~S" ch)))))))
  236. ((eof-object? ch)
  237. (error "unexpected end of input in character escape sequence"))
  238. (else
  239. (error "invalid character in escape sequence: ~S" ch)))))
  240. (define (read-fixed-hex-escape len)
  241. (let lp ((len len) (res 0))
  242. (if (zero? len)
  243. (integer->char res)
  244. (let ((ch (next)))
  245. (cond
  246. ((hex-digit ch) =>
  247. (lambda (digit)
  248. (lp (1- len) (+ (* res 16) digit))))
  249. ((eof-object? ch)
  250. (error "unexpected end of input in character escape sequence"))
  251. (else
  252. (error "invalid character in escape sequence: ~S" ch)))))))
  253. (define (read-string rdelim)
  254. (let lp ((out '()))
  255. (let ((ch (next)))
  256. (cond
  257. ((eof-object? ch)
  258. (error "unexpected end of input while reading string"))
  259. ((eqv? ch rdelim)
  260. (reverse-list->string out))
  261. ((eqv? ch #\\)
  262. (let ((ch (next)))
  263. (when (eof-object? ch)
  264. (error "unexpected end of input while reading string"))
  265. (cond
  266. ((eqv? ch #\newline)
  267. (when (hungry-eol-escapes?)
  268. ;; Skip intraline whitespace before continuing.
  269. (let skip ()
  270. (let ((ch (peek)))
  271. (when (and (not (eof-object? ch))
  272. (or (eqv? ch #\tab)
  273. (eq? (char-general-category ch) 'Zs)))
  274. (next)
  275. (skip)))))
  276. (lp out))
  277. ((eqv? ch rdelim)
  278. (lp (cons rdelim out)))
  279. (else
  280. (lp
  281. (cons
  282. (case ch
  283. ;; Accept "\(" for use at the beginning of
  284. ;; lines in multiline strings to avoid
  285. ;; confusing emacs lisp modes.
  286. ((#\| #\\ #\() ch)
  287. ((#\0) #\nul)
  288. ((#\f) #\ff)
  289. ((#\n) #\newline)
  290. ((#\r) #\return)
  291. ((#\t) #\tab)
  292. ((#\a) #\alarm)
  293. ((#\v) #\vtab)
  294. ((#\b) #\backspace)
  295. ((#\x)
  296. (if (or (r6rs-escapes?) (eqv? rdelim #\|))
  297. (read-r6rs-hex-escape)
  298. (read-fixed-hex-escape 2)))
  299. ((#\u)
  300. (read-fixed-hex-escape 4))
  301. ((#\U)
  302. (read-fixed-hex-escape 6))
  303. (else
  304. (error "invalid character in escape sequence: ~S" ch)))
  305. out))))))
  306. (else
  307. (lp (cons ch out)))))))
  308. (define (read-character)
  309. (let ((ch (next)))
  310. (cond
  311. ((eof-object? ch)
  312. (error "unexpected end of input after #\\"))
  313. ((delimiter? ch)
  314. ch)
  315. (else
  316. (let* ((tok (read-token ch))
  317. (len (string-length tok)))
  318. (define dotted-circle #\x25cc)
  319. (define r5rs-charnames
  320. '(("space" . #\x20) ("newline" . #\x0a)))
  321. (define r6rs-charnames
  322. '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
  323. ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
  324. ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
  325. ("delete" . #\x7f)))
  326. (define r7rs-charnames
  327. '(("escape" . #\x1b)))
  328. (define C0-control-charnames
  329. '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
  330. ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
  331. ("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
  332. ("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
  333. ("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
  334. ("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
  335. ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
  336. ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
  337. ("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
  338. ("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
  339. ("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
  340. ("del" . #\x7f)))
  341. (define alt-charnames
  342. '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
  343. ;; Although R6RS and R7RS charnames specified as being
  344. ;; case-sensitive, Guile matches them case-insensitively, like
  345. ;; other char names.
  346. (define (named-char tok alist)
  347. (let lp ((alist alist))
  348. (and (pair? alist)
  349. (if (string-ci=? tok (caar alist))
  350. (cdar alist)
  351. (lp (cdr alist))))))
  352. (cond
  353. ((= len 1) ch)
  354. ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
  355. ;; Ignore dotted circles, which may be used to keep
  356. ;; combining characters from combining with the backslash in
  357. ;; #\charname.
  358. ch)
  359. ((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
  360. (string->number tok 8))
  361. ;; Specifying a codepoint as an octal value.
  362. => integer->char)
  363. ((and (eqv? ch #\x) (> len 1)
  364. (string->number (substring tok 1) 16))
  365. ;; Specifying a codepoint as an hexadecimal value. Skip
  366. ;; initial "x".
  367. => integer->char)
  368. ((named-char tok r5rs-charnames))
  369. ((named-char tok r6rs-charnames))
  370. ((named-char tok r7rs-charnames))
  371. ((named-char tok C0-control-charnames))
  372. ((named-char tok alt-charnames))
  373. (else
  374. (error "unknown character name ~a" tok))))))))
  375. (define (read-vector)
  376. (list->vector (map strip-annotation (read-parenthesized #\)))))
  377. (define (read-srfi-4-vector ch)
  378. (read-array ch))
  379. (define (maybe-read-boolean-tail tail)
  380. (let ((len (string-length tail)))
  381. (let lp ((i 0))
  382. (or (= i len)
  383. (let ((ch (peek)))
  384. (and (not (eof-object? ch))
  385. (eqv? (char-downcase ch) (string-ref tail i))
  386. (or (begin
  387. (next)
  388. (lp (1+ i)))
  389. (begin
  390. (unread-char ch port)
  391. #f))))))))
  392. (define (read-false-or-srfi-4-vector)
  393. (let ((ch (peek)))
  394. (if (or (eqv? ch #\3)
  395. (eqv? ch #\6))
  396. (read-srfi-4-vector #\f)
  397. (begin
  398. (maybe-read-boolean-tail "alse")
  399. #f))))
  400. (define (read-bytevector)
  401. (define (expect ch)
  402. (unless (eqv? (next) ch)
  403. (error "invalid bytevector prefix" ch)))
  404. (expect #\u)
  405. (expect #\8)
  406. (expect #\()
  407. (list->typed-array 'vu8 1
  408. (map strip-annotation (read-parenthesized #\)))))
  409. ;; FIXME: We should require a terminating delimiter.
  410. (define (read-bitvector)
  411. (list->bitvector
  412. (let lp ()
  413. (let ((ch (peek)))
  414. (case ch
  415. ((#\0) (next) (cons #f (lp)))
  416. ((#\1) (next) (cons #t (lp)))
  417. (else '()))))))
  418. (define (read-boolean ch)
  419. ;; Historically, Guile hasn't required a delimiter after #f / #t.
  420. ;; When the longer #false / #true forms were added, we kept this
  421. ;; behavior. It is terrible and we should change it!!
  422. (case ch
  423. ((#\t #\T)
  424. (maybe-read-boolean-tail "rue")
  425. #t)
  426. (else
  427. (maybe-read-boolean-tail "alse")
  428. #f)))
  429. (define (read-keyword)
  430. (let ((expr (strip-annotation (read-subexpression "keyword"))))
  431. (unless (symbol? expr)
  432. (error "keyword prefix #: not followed by a symbol: ~a" expr))
  433. (symbol->keyword expr)))
  434. (define (read-array ch)
  435. (define (read-decimal-integer ch alt)
  436. ;; This parser has problems but it's what Guile's read.c does. Any
  437. ;; fix should come later and to both of them.
  438. (define (decimal-digit ch)
  439. (and (not (eof-object? ch))
  440. (let ((digit (- (char->integer ch) (char->integer #\0))))
  441. (and (<= 0 digit 9) digit))))
  442. (let*-values (((sign ch) (if (eqv? ch #\-)
  443. (values -1 (next))
  444. (values 1 ch))))
  445. (let lp ((ch ch) (res #f))
  446. (cond
  447. ((decimal-digit ch)
  448. => (lambda (digit)
  449. (lp (next) (if res (+ (* 10 res) digit) digit))))
  450. (else
  451. (values ch (if res (* res sign) alt)))))))
  452. (define (read-rank ch)
  453. (let*-values (((ch rank) (read-decimal-integer ch 1)))
  454. (when (< rank 0)
  455. (error "array rank must be non-negative"))
  456. (when (eof-object? ch)
  457. (error "unexpected end of input while reading array"))
  458. (values ch rank)))
  459. (define (read-tag ch)
  460. (let lp ((ch ch) (chars '()))
  461. (when (eof-object? ch)
  462. (error "unexpected end of input while reading array"))
  463. (if (memv ch '(#\( #\@ #\:))
  464. (values ch
  465. (if (null? chars)
  466. #t
  467. (string->symbol (list->string (reverse chars)))))
  468. (lp (next) (cons ch chars)))))
  469. (define (read-dimension ch)
  470. (let*-values (((ch lbnd) (if (eqv? ch #\@)
  471. (read-decimal-integer (next) 0)
  472. (values ch 0)))
  473. ((ch len) (if (eqv? ch #\:)
  474. (read-decimal-integer (next) 0)
  475. (values ch #f))))
  476. (when (and len (< len 0))
  477. (error "array length must be non-negative"))
  478. (when (eof-object? ch)
  479. (error "unexpected end of input while reading array"))
  480. (values ch
  481. (if len
  482. (list lbnd (+ lbnd (1- len)))
  483. lbnd))))
  484. (define (read-shape ch alt)
  485. (if (memv ch '(#\@ #\:))
  486. (let*-values (((ch head) (read-dimension ch))
  487. ((ch tail) (read-shape ch '())))
  488. (values ch (cons head tail)))
  489. (values ch alt)))
  490. (define (read-elements ch rank)
  491. (unless (eqv? ch #\()
  492. (error "missing '(' in vector or array literal"))
  493. (let ((elts (map strip-annotation (read-parenthesized #\)))))
  494. (if (zero? rank)
  495. (begin
  496. ;; Handle special print syntax of rank zero arrays; see
  497. ;; scm_i_print_array for a rationale.
  498. (when (null? elts)
  499. (error "too few elements in array literal, need 1"))
  500. (unless (null? (cdr elts))
  501. (error "too many elements in array literal, need 1"))
  502. (car elts))
  503. elts)))
  504. (let*-values (((ch rank) (read-rank ch))
  505. ((ch tag) (read-tag ch))
  506. ((ch shape) (read-shape ch rank))
  507. ((elts) (read-elements ch rank)))
  508. (when (and (pair? shape) (not (eqv? (length shape) rank)))
  509. (error "the number of shape specifications must match the array rank"))
  510. (list->typed-array tag shape elts)))
  511. (define (read-number-and-radix ch)
  512. (let ((tok (string-append "#" (read-token ch))))
  513. (or (string->number tok)
  514. (error "unknown # object: ~S" tok))))
  515. (define (read-extended-symbol)
  516. (define (next-not-eof)
  517. (let ((ch (next)))
  518. (when (eof-object? ch)
  519. (error "end of input while reading symbol"))
  520. ch))
  521. (string->symbol
  522. (list->string
  523. (let lp ((saw-brace? #f))
  524. (let lp/inner ((ch (next-not-eof))
  525. (saw-brace? saw-brace?))
  526. (cond
  527. (saw-brace?
  528. (if (eqv? ch #\#)
  529. '()
  530. ;; Don't eat CH, see
  531. ;; <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=49623>.
  532. (cons #\} (lp/inner ch #f))))
  533. ((eqv? ch #\})
  534. (lp #t))
  535. ((eqv? ch #\\)
  536. ;; It used to be that print.c would print extended-read-syntax
  537. ;; symbols with backslashes before "non-standard" chars, but
  538. ;; this routine wouldn't do anything with those escapes.
  539. ;; Bummer. What we've done is to change print.c to output
  540. ;; R6RS hex escapes for those characters, relying on the fact
  541. ;; that the extended read syntax would never put a `\' before
  542. ;; an `x'. For now, we just ignore other instances of
  543. ;; backslash in the string.
  544. (let* ((ch (next-not-eof))
  545. (ch (if (eqv? ch #\x)
  546. (read-r6rs-hex-escape)
  547. ch)))
  548. (cons ch (lp #f))))
  549. (else
  550. (cons ch (lp #f)))))))))
  551. (define (read-nil)
  552. ;; Have already read "#\n" -- now read "il".
  553. (let ((id (read-mixed-case-symbol #\n)))
  554. (unless (eq? id 'nil)
  555. (error "unexpected input while reading #nil: ~a" id))
  556. #nil))
  557. (define (read-sharp)
  558. (let* ((ch (next)))
  559. (cond
  560. ((eof-object? ch)
  561. (error "unexpected end of input after #"))
  562. ((read-hash-procedure ch)
  563. => (lambda (proc) (proc ch port)))
  564. (else
  565. (case ch
  566. ((#\\) (read-character))
  567. ((#\() (read-vector))
  568. ((#\s #\u #\c) (read-srfi-4-vector ch))
  569. ((#\f) (read-false-or-srfi-4-vector))
  570. ((#\v) (read-bytevector))
  571. ((#\*) (read-bitvector))
  572. ((#\t #\T #\F) (read-boolean ch))
  573. ((#\:) (read-keyword))
  574. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\@)
  575. (read-array ch))
  576. ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
  577. (read-number-and-radix ch))
  578. ((#\{) (read-extended-symbol))
  579. ((#\') (list 'syntax (read-subexpression "syntax expression")))
  580. ((#\`) (list 'quasisyntax
  581. (read-subexpression "quasisyntax expression")))
  582. ((#\,)
  583. (if (eqv? #\@ (peek))
  584. (begin
  585. (next)
  586. (list 'unsyntax-splicing
  587. (read-subexpression "unsyntax-splicing expression")))
  588. (list 'unsyntax (read-subexpression "unsyntax expression"))))
  589. ((#\n) (read-nil))
  590. (else
  591. (error "Unknown # object: ~S" (string #\# ch))))))))
  592. (define (read-number ch)
  593. (let* ((str (read-token ch)))
  594. (or (string->number str)
  595. (string->symbol (if (case-insensitive?)
  596. (string-downcase str)
  597. str)))))
  598. (define (read-expr* ch)
  599. (case ch
  600. ((#\{)
  601. (cond
  602. ((curly-infix?)
  603. (set! neoteric (1+ neoteric))
  604. (let ((expr (read-parenthesized #\})))
  605. (set! neoteric (1- neoteric))
  606. expr))
  607. (else
  608. (read-mixed-case-symbol ch))))
  609. ((#\[)
  610. (cond
  611. ((square-brackets?)
  612. (read-parenthesized #\]))
  613. ((curly-infix?)
  614. ;; The syntax of neoteric expressions requires that '[' be a
  615. ;; delimiter when curly-infix is enabled, so it cannot be part
  616. ;; of an unescaped symbol. We might as well do something
  617. ;; useful with it, so we adopt Kawa's convention: [...] =>
  618. ;; ($bracket-list$ ...)
  619. ;; FIXME: source locations for this cons
  620. (cons '$bracket-list$ (read-parenthesized #\])))
  621. (else
  622. (read-mixed-case-symbol ch))))
  623. ((#\()
  624. (read-parenthesized #\)))
  625. ((#\")
  626. (read-string ch))
  627. ((#\|)
  628. (if (r7rs-symbols?)
  629. (string->symbol (read-string ch))
  630. (read-mixed-case-symbol ch)))
  631. ((#\')
  632. (list 'quote (read-subexpression "quoted expression")))
  633. ((#\`)
  634. (list 'quasiquote (read-subexpression "quasiquoted expression")))
  635. ((#\,)
  636. (cond
  637. ((eqv? #\@ (peek))
  638. (next)
  639. (list 'unquote-splicing (read-subexpression "subexpression of ,@")))
  640. (else
  641. (list 'unquote (read-subexpression "unquoted expression")))))
  642. ((#\#)
  643. ;; FIXME: read-sharp should recur if we read a comment
  644. (read-sharp))
  645. ((#\))
  646. (error "unexpected \")\""))
  647. ((#\})
  648. (if (curly-infix?)
  649. (error "unexpected \"}\"")
  650. (read-mixed-case-symbol ch)))
  651. ((#\])
  652. (if (square-brackets?)
  653. (error "unexpected \"]\"")
  654. (read-mixed-case-symbol ch)))
  655. ((#\:)
  656. (if (eq? (keyword-style) keyword-style-prefix)
  657. ;; FIXME: Don't skip whitespace here.
  658. (let ((sym (read-subexpression ":keyword")))
  659. (symbol->keyword (strip-annotation sym)))
  660. (read-mixed-case-symbol ch)))
  661. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.)
  662. (read-number ch))
  663. (else
  664. (read-mixed-case-symbol ch))))
  665. (define (read-neoteric ch)
  666. (let lp ((expr (read-expr* ch)))
  667. ;; 'expr' is the first component of the neoteric expression. If
  668. ;; the next character is '(', '[', or '{', (without any
  669. ;; intervening whitespace), we use it to construct a new
  670. ;; expression, and loop. For example:
  671. ;; f{n - 1}(x) => ((f (- n 1)) x).
  672. (case (peek)
  673. ((#\() ;; e(...) => (e ...)
  674. (next)
  675. (lp (cons expr (read-parenthesized #\)))))
  676. ((#\[) ;; e[...] => ($bracket-apply$ e ...)
  677. (next)
  678. (lp (cons* '$bracket-apply$ expr (read-parenthesized #\]))))
  679. ((#\{) ;; e{} => (e); e{...} => (e {...})
  680. (next)
  681. (let ((args (read-parenthesized #\})))
  682. (lp (if (null? args)
  683. (list expr)
  684. (list expr args)))))
  685. (else
  686. expr))))
  687. (define (read-expr ch)
  688. (let ((line (port-line port))
  689. (column (port-column port)))
  690. (annotate line
  691. column
  692. (if (zero? neoteric)
  693. (read-expr* ch)
  694. (read-neoteric ch)))))
  695. (define (read-directive)
  696. (define (directive-char? ch)
  697. (and (char? ch)
  698. (or (eqv? ch #\-)
  699. (char-alphabetic? ch)
  700. (char-numeric? ch))))
  701. (let ((ch (peek)))
  702. (cond
  703. ((directive-char? ch)
  704. (next)
  705. (string->symbol (take-while ch directive-char?)))
  706. (else
  707. #f))))
  708. (define (skip-scsh-comment)
  709. (let lp ((ch (next)))
  710. (cond
  711. ((eof-object? ch)
  712. (error "unterminated `#! ... !#' comment"))
  713. ((eqv? ch #\!)
  714. (let ((ch (next)))
  715. (if (eqv? ch #\#)
  716. (next)
  717. (lp ch))))
  718. (else
  719. (lp (next))))))
  720. (define (process-shebang)
  721. ;; After having read #!, we complete either with #!r6rs,
  722. ;; #!fold-case, #!no-fold-case, #!curly-infix,
  723. ;; #!curly-infix-and-bracket-lists, or a SCSH block comment
  724. ;; terminated by !#.
  725. (let ((sym (read-directive)))
  726. (cond
  727. ((eq? sym 'r6rs)
  728. (set-reader-option! bitfield:case-insensitive? 0)
  729. (set-reader-option! bitfield:r6rs-escapes? 1)
  730. (set-reader-option! bitfield:square-brackets? 1)
  731. (set-reader-option! bitfield:keyword-style keyword-style-hash-prefix)
  732. (set-reader-option! bitfield:hungry-eol-escapes? 1)
  733. (next))
  734. ((eq? sym 'fold-case)
  735. (set-reader-option! bitfield:case-insensitive? 1)
  736. (next))
  737. ((eq? sym 'no-fold-case)
  738. (set-reader-option! bitfield:case-insensitive? 0)
  739. (next))
  740. ((eq? sym 'curly-infix)
  741. (set-reader-option! bitfield:curly-infix? 1)
  742. (next))
  743. ((eq? sym 'curly-infix-and-bracket-lists)
  744. (set-reader-option! bitfield:curly-infix? 1)
  745. (set-reader-option! bitfield:square-brackets? 0)
  746. (next))
  747. (else
  748. (skip-scsh-comment)))))
  749. (define (skip-eol-comment)
  750. (let ((ch (next)))
  751. (cond
  752. ((eof-object? ch) ch)
  753. ((eq? ch #\newline) (next))
  754. (else (skip-eol-comment)))))
  755. ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
  756. ;; nested.
  757. (define (skip-r6rs-block-comment)
  758. ;; We have read #|, now looking for |#.
  759. (let ((ch (next)))
  760. (when (eof-object? ch)
  761. (error "unterminated `#| ... |#' comment"))
  762. (cond
  763. ((and (eqv? ch #\|) (eqv? (peek) #\#))
  764. ;; Done.
  765. (next)
  766. (values))
  767. ((and (eqv? ch #\#) (eqv? (peek) #\|))
  768. ;; A nested comment.
  769. (next)
  770. (skip-r6rs-block-comment)
  771. (skip-r6rs-block-comment))
  772. (else
  773. (skip-r6rs-block-comment)))))
  774. (define (read-subexpression what)
  775. (let ((ch (next-non-whitespace)))
  776. (when (eof-object? ch)
  777. (error (string-append "unexpected end of input while reading " what)))
  778. (read-expr ch)))
  779. (define (next-non-whitespace)
  780. (let lp ((ch (next)))
  781. (case ch
  782. ((#\;)
  783. (lp (skip-eol-comment)))
  784. ((#\#)
  785. (case (peek)
  786. ((#\!)
  787. (next)
  788. (lp (process-shebang)))
  789. ((#\;)
  790. (next)
  791. (read-subexpression "#; comment")
  792. (next-non-whitespace))
  793. ((#\|)
  794. (if (read-hash-procedure #\|)
  795. ch
  796. (begin
  797. (next)
  798. (skip-r6rs-block-comment)
  799. (next-non-whitespace))))
  800. (else ch)))
  801. ((#\space #\return #\ff #\newline #\tab)
  802. (next-non-whitespace))
  803. (else ch))))
  804. (let ((ch (next-non-whitespace)))
  805. (if (eof-object? ch)
  806. ch
  807. (read-expr ch))))
  808. (define* (read #:optional (port (current-input-port)))
  809. (define filename (port-filename port))
  810. (define annotate
  811. (if (memq 'positions (read-options))
  812. (lambda (line column datum)
  813. (when (and (supports-source-properties? datum)
  814. ;; Line or column can be invalid via
  815. ;; set-port-column! or ungetting chars beyond start
  816. ;; of line.
  817. (<= 0 line)
  818. (<= 1 column))
  819. ;; We always capture the column after one char of lookahead;
  820. ;; subtract off that lookahead value.
  821. (set-source-properties! datum
  822. `((filename . ,filename)
  823. (line . ,line)
  824. (column . ,(1- column)))))
  825. datum)
  826. (lambda (line column datum)
  827. datum)))
  828. (%read port annotate identity))
  829. (define* (read-syntax #:optional (port (current-input-port)))
  830. (define filename (port-filename port))
  831. (define (annotate line column datum)
  832. ;; Usually when reading compound expressions consisting of multiple
  833. ;; syntax objects, like lists, the "leaves" of the expression are
  834. ;; annotated but the "root" isn't. Like in (A . B), A and B will be
  835. ;; annotated but the pair won't. Therefore the usually correct
  836. ;; thing to do is to just annotate the result. However in the case
  837. ;; of reading ( . C), the result is the already annotated C, which
  838. ;; we don't want to re-annotate. Therefore we avoid re-annotating
  839. ;; already annotated objects.
  840. (if (syntax? datum)
  841. datum
  842. (datum->syntax #f ; No lexical context.
  843. datum
  844. #:source (vector filename line (1- column)))))
  845. (%read port annotate syntax->datum))