read.scm 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789
  1. ;;; R7RS (scheme read) library
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; R7RS (scheme read) implementation
  18. ;;;
  19. ;;; Code:
  20. (library (hoot read)
  21. (export read string->number)
  22. (import (only (hoot primitives)
  23. define* define-syntax-rule
  24. define let let* and or case cond else lambda set! => _ ... if quote begin when unless
  25. values)
  26. (hoot char)
  27. (hoot match)
  28. (hoot bitvectors)
  29. (hoot keywords)
  30. (hoot strings)
  31. (hoot symbols)
  32. (hoot values)
  33. (hoot vectors)
  34. (hoot eq)
  35. (hoot errors)
  36. (hoot exceptions)
  37. (hoot pairs)
  38. (hoot lists)
  39. (hoot not)
  40. (hoot numbers)
  41. (hoot ports))
  42. (define* (string->number str #:optional (radix 10))
  43. (let ((port (open-input-string str)))
  44. (define (read-bin-digit)
  45. (case (peek-char port)
  46. ((#\0 #\1)
  47. (- (char->integer (read-char port)) (char->integer #\0)))
  48. (else #f)))
  49. (define (read-oct-digit)
  50. (case (peek-char port)
  51. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
  52. (- (char->integer (read-char port)) (char->integer #\0)))
  53. (else #f)))
  54. (define (read-dec-digit)
  55. (case (peek-char port)
  56. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  57. (- (char->integer (read-char port)) (char->integer #\0)))
  58. (else #f)))
  59. (define (read-hex-digit)
  60. (case (peek-char port)
  61. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  62. (- (char->integer (read-char port)) (char->integer #\0)))
  63. ((#\a #\b #\c #\d #\e #\f)
  64. (+ 10 (- (char->integer (read-char port)) (char->integer #\a))))
  65. ((#\A #\B #\C #\D #\E #\F)
  66. (+ 10 (- (char->integer (read-char port)) (char->integer #\A))))
  67. (else #f)))
  68. (define (read-unsigned-int radix)
  69. (case radix
  70. ((2)
  71. (let ((x (read-bin-digit)))
  72. (and x
  73. (let loop ((x x))
  74. (let ((y (read-bin-digit)))
  75. (if y (loop (+ (* x 2) y)) x))))))
  76. ((8)
  77. (let ((x (read-oct-digit)))
  78. (and x
  79. (let loop ((x x))
  80. (let ((y (read-oct-digit)))
  81. (if y (loop (+ (* x 8) y)) x))))))
  82. ((10)
  83. (let ((x (read-dec-digit)))
  84. (and x
  85. (let loop ((x x))
  86. (let ((y (read-dec-digit)))
  87. (if y (loop (+ (* x 10) y)) x))))))
  88. ((16)
  89. (let ((x (read-hex-digit)))
  90. (and x
  91. (let loop ((x x))
  92. (let ((y (read-hex-digit)))
  93. (if y (loop (+ (* x 16) y)) x))))))))
  94. (define (read-sign)
  95. (let ((ch (peek-char port)))
  96. (cond
  97. ((eof-object? ch) #f)
  98. ((eqv? ch #\+)
  99. (read-char port)
  100. '+)
  101. ((eqv? ch #\-)
  102. (read-char port)
  103. '-)
  104. (else 'none))))
  105. (define (read-decimal n exactness)
  106. (case (peek-char port)
  107. ;; Decimal point
  108. ((#\.)
  109. (read-char port)
  110. (let ((ch (peek-char port)))
  111. ;; '0.' is a valid number, but '.' is not. 'n' being #f
  112. ;; signals the latter case.
  113. (if (eof-object? ch)
  114. (and n (inexact n))
  115. (let ((n (or n 0))
  116. (x (read-dec-digit)))
  117. (and x
  118. (let loop ((i -2) (x (* x (expt 10 -1))))
  119. (let ((y (read-dec-digit)))
  120. (if y
  121. (loop (- i 1) (+ x (* y (expt 10 i))))
  122. (let ((z (+ n x)))
  123. (or (read-decimal z exactness)
  124. (if (eq? exactness 'exact)
  125. z
  126. (inexact z))))))))))))
  127. ;; Exponent
  128. ((#\e #\E)
  129. (read-char port)
  130. (let* ((sign (read-sign))
  131. (x (read-unsigned-int 10)))
  132. (and x
  133. (let ((y (* n (expt 10 (if (eq? sign '-) (- x) x)))))
  134. (if (eq? exactness 'exact) y (inexact y))))))
  135. (else #f)))
  136. (define (read-unsigned radix exactness)
  137. (let ((ch (peek-char port)))
  138. (cond
  139. ((eof-object? ch) #f)
  140. ;; NaN
  141. ((or (eqv? ch #\n) (eqv? ch #\N))
  142. (read-char port)
  143. (case (read-char port)
  144. ((#\a #\A)
  145. (case (read-char port)
  146. ((#\n #\N)
  147. (case (read-char port)
  148. ((#\.)
  149. (case (read-char port)
  150. ((#\0) +nan.0)
  151. (else #f)))
  152. (else #f)))
  153. (else #f)))
  154. (else #f)))
  155. ;; Infinity
  156. ((or (eqv? ch #\i) (eqv? ch #\I))
  157. (read-char port)
  158. (let ((ch (peek-char port)))
  159. (cond
  160. ;; This might be a valid complex number, either '+i' or
  161. ;; '-i', so back up a char so the caller can check for
  162. ;; that case.
  163. ((eof-object? ch)
  164. (seek port -1 'cur)
  165. #f)
  166. ((or (eqv? ch #\n) (eqv? ch #\N))
  167. (read-char port)
  168. (case (read-char port)
  169. ((#\f #\F)
  170. (case (read-char port)
  171. ((#\.)
  172. (case (read-char port)
  173. ((#\0) +inf.0)
  174. (else #f)))
  175. (else #f)))
  176. (else #f)))
  177. (else #f))))
  178. ;; Decimal with no leading digits.
  179. ((eqv? ch #\.)
  180. (and (eqv? radix 10) (read-decimal #f exactness)))
  181. (else
  182. (let ((x (read-unsigned-int radix)))
  183. (and x
  184. (case (peek-char port)
  185. ;; Fraction
  186. ((#\/)
  187. (read-char port)
  188. (let ((y (read-unsigned-int radix)))
  189. (and y
  190. (let ((z (/ x y)))
  191. (if (eq? exactness 'inexact) (inexact z) z)))))
  192. ;; Decimal point or exponent
  193. ((#\. #\e #\E)
  194. (and (eqv? radix 10) (read-decimal x exactness)))
  195. (else
  196. (if (eq? exactness 'inexact) (inexact x) x)))))))))
  197. (define (read-complex radix exactness)
  198. (let ((sign (read-sign)))
  199. (and sign
  200. (let ((x (read-unsigned radix exactness)))
  201. (cond
  202. ((or (and (not x) (eq? sign 'none))
  203. ;; Infinities and NaNs need explicit sign.
  204. (and x (or (infinite? x) (nan? x)) (eq? sign 'none)))
  205. #f)
  206. ;; +i and -i cases.
  207. ((not x)
  208. (let ((ch (read-char port)))
  209. (and (or (eqv? ch #\i) (eqv? ch #\I))
  210. (if (eq? sign '+) +i -i))))
  211. ;; We've successfully read one real, now to check for
  212. ;; a polar or imaginary part.
  213. (else
  214. (let ((x (if (eq? sign '-) (- x) x)))
  215. (let ((ch (peek-char port)))
  216. (cond
  217. ((eof-object? ch) x)
  218. ;; Complex number in polar form.
  219. ((eqv? ch #\@)
  220. (read-char port)
  221. (let* ((sign (read-sign))
  222. (y (read-unsigned radix exactness)))
  223. (and y (make-polar x (if (eq? sign '-) (- y) y)))))
  224. ;; Complex number in rectangular form.
  225. ((or (eqv? ch #\+) (eqv? ch #\-))
  226. (let ((sign (read-sign))
  227. (y (or (read-unsigned radix exactness) 1.0)))
  228. (case (read-char port)
  229. ((#\i #\I)
  230. (make-rectangular x (if (eq? sign '-) (- y) y)))
  231. (else #f))))
  232. (else #f))))))))))
  233. (define (read-number)
  234. ;; First, read the radix and exactness prefix. These could be
  235. ;; specified in either order (like #x#e or #e#x), one could be
  236. ;; omitted (just #x or #e), or both could be omitted. When
  237. ;; exactness is omitted, exactness becomes implicit. For
  238. ;; example, '1.2' will produce an inexact value.
  239. (let loop ((radix* #f) (exactness #f))
  240. (let ((ch (peek-char port)))
  241. (cond
  242. ((eof-object? ch) #f)
  243. ((eqv? ch #\#)
  244. (read-char port)
  245. (let ((ch (read-char port)))
  246. (cond
  247. ((and (or (eqv? ch #\b) (eqv? ch #\B)) (not radix*))
  248. (loop 2 exactness))
  249. ((and (or (eqv? ch #\o) (eqv? ch #\O)) (not radix*))
  250. (loop 8 exactness))
  251. ((and (or (eqv? ch #\d) (eqv? ch #\D)) (not radix*))
  252. (loop 10 exactness))
  253. ((and (or (eqv? ch #\x) (eqv? ch #\X)) (not radix*))
  254. (loop 16 exactness))
  255. ((and (or (eqv? ch #\e) (eqv? ch #\E)) (not exactness))
  256. (loop radix* 'exact))
  257. ((and (or (eqv? ch #\i) (eqv? ch #\I)) (not exactness))
  258. (loop radix* 'inexact))
  259. (else #f))))
  260. (else
  261. (read-complex (or radix* radix) exactness))))))
  262. (let ((x (read-number)))
  263. ;; Input should be completely consumed at this point.
  264. (and (eof-object? (peek-char port)) x))))
  265. (define* (read #:optional (port (current-input-port)))
  266. ;; For read-syntax, we'd define these annotate / strip functions
  267. ;; differently, to create syntax objects instead.
  268. (define (annotate line column datum) datum)
  269. (define (strip-annotation datum) datum)
  270. (define fold-case? (%port-fold-case? port))
  271. (define (set-fold-case?! val)
  272. (set! fold-case? val)
  273. (%set-port-fold-case?! port val))
  274. (define (next) (read-char port))
  275. (define (peek) (peek-char port))
  276. ;; We are only ever interested in whether an object is a char or not.
  277. (define (eof-object? x) (not (char? x)))
  278. (define (input-error msg args)
  279. (raise
  280. (make-exception (make-lexical-violation)
  281. (make-exception-with-origin "read")
  282. (make-exception-with-message msg)
  283. (make-exception-with-irritants args)
  284. (make-i/o-filename-error (port-filename port))
  285. (make-i/o-line-and-column-error (1+ (port-line port))
  286. (1+ (port-column port))))))
  287. (define-syntax-rule (error msg arg ...)
  288. (let ((args (list arg ...)))
  289. (input-error msg args)))
  290. (define (read-semicolon-comment)
  291. (let ((ch (next)))
  292. (cond
  293. ((eof-object? ch) ch)
  294. ((eqv? ch #\newline) (next))
  295. (else (read-semicolon-comment)))))
  296. (define-syntax-rule (take-until first pred)
  297. (let ((p (open-output-string)))
  298. (write-char first p)
  299. (let lp ()
  300. (let ((ch (peek)))
  301. (if (or (eof-object? ch) (pred ch))
  302. (get-output-string p)
  303. (begin
  304. (write-char ch p)
  305. (next)
  306. (lp)))))))
  307. (define-syntax-rule (take-while first pred)
  308. (take-until first (lambda (ch) (not (pred ch)))))
  309. (define (delimiter? ch)
  310. (case ch
  311. ((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab #\[ #\]) #t)
  312. (else #f)))
  313. (define (read-token ch)
  314. (take-until ch delimiter?))
  315. (define (read-mixed-case-symbol ch)
  316. (let ((str (read-token ch)))
  317. (string->symbol (if fold-case? (string-downcase str) str))))
  318. (define (read-parenthesized rdelim)
  319. (let lp ((ch (next-non-whitespace)))
  320. (when (eof-object? ch)
  321. (error "unexpected end of input while searching for: ~A"
  322. rdelim))
  323. (cond
  324. ((eqv? ch rdelim) '())
  325. ((or (eqv? ch #\)) (eqv? ch #\]))
  326. (error "mismatched close paren: ~A" ch))
  327. (else
  328. (let ((expr (read-expr ch)))
  329. ;; Note that it is possible for scm_read_expression to
  330. ;; return `.', but not as part of a dotted pair: as in
  331. ;; #{.}#. Indeed an example is here!
  332. (if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#))
  333. (let* ((tail (read-subexpression "tail of improper list"))
  334. (close (next-non-whitespace)))
  335. (unless (eqv? close rdelim)
  336. (error "missing close paren: ~A" close))
  337. tail)
  338. (cons expr (lp (next-non-whitespace)))))))))
  339. (define (hex-digit ch)
  340. (case ch
  341. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  342. (- (char->integer ch) (char->integer #\0)))
  343. ((#\a #\b #\c #\d #\e #\f)
  344. (+ 10 (- (char->integer ch) (char->integer #\a))))
  345. ((#\A #\B #\C #\D #\E #\F)
  346. (+ 10 (- (char->integer ch) (char->integer #\A))))
  347. (else #f)))
  348. (define (read-r6rs-hex-escape)
  349. (let ((ch (next)))
  350. (cond
  351. ((hex-digit ch) =>
  352. (lambda (res)
  353. (let lp ((res res))
  354. (let ((ch (next)))
  355. (cond
  356. ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
  357. ((eqv? ch #\;) (integer->char res))
  358. ((eof-object? ch)
  359. (error "unexpected end of input in character escape sequence"))
  360. (else
  361. (error "invalid character in escape sequence: ~S" ch)))))))
  362. ((eof-object? ch)
  363. (error "unexpected end of input in character escape sequence"))
  364. (else
  365. (error "invalid character in escape sequence: ~S" ch)))))
  366. (define (read-fixed-hex-escape len)
  367. (let lp ((len len) (res 0))
  368. (if (zero? len)
  369. (integer->char res)
  370. (let ((ch (next)))
  371. (cond
  372. ((hex-digit ch) =>
  373. (lambda (digit)
  374. (lp (1- len) (+ (* res 16) digit))))
  375. ((eof-object? ch)
  376. (error "unexpected end of input in character escape sequence"))
  377. (else
  378. (error "invalid character in escape sequence: ~S" ch)))))))
  379. (define (char-intraline-whitespace? ch)
  380. ;; True for tab and for codepoints whose general category is Zs.
  381. (case ch
  382. ((#\tab #\space
  383. #\240 #\13200
  384. #\20000 #\20001 #\20002 #\20003 #\20004 #\20005
  385. #\20006 #\20007 #\20010 #\20011 #\20012
  386. #\20057
  387. #\20137
  388. #\30000) #t)
  389. (else #f)))
  390. (define (read-string rdelim)
  391. (let ((out (open-output-string)))
  392. (let lp ()
  393. (let ((ch (next)))
  394. (cond
  395. ((eof-object? ch)
  396. (error "unexpected end of input while reading string"))
  397. ((eqv? ch rdelim)
  398. (get-output-string out))
  399. ((eqv? ch #\\)
  400. (let ((ch (next)))
  401. (when (eof-object? ch)
  402. (error "unexpected end of input while reading string"))
  403. (cond
  404. ((eqv? ch #\newline)
  405. ;; Skip intraline whitespace before continuing.
  406. (let skip ()
  407. (let ((ch (peek)))
  408. (when (and (not (eof-object? ch))
  409. (char-intraline-whitespace? ch))
  410. (next)
  411. (skip))))
  412. (lp))
  413. ((eqv? ch rdelim)
  414. (write-char rdelim out)
  415. (lp))
  416. (else
  417. (write-char
  418. (case ch
  419. ;; Accept "\(" for use at the beginning of
  420. ;; lines in multiline strings to avoid
  421. ;; confusing emacs lisp modes.
  422. ((#\| #\\ #\() ch)
  423. ((#\0) #\nul)
  424. ((#\f) #\ff)
  425. ((#\n) #\newline)
  426. ((#\r) #\return)
  427. ((#\t) #\tab)
  428. ((#\a) #\alarm)
  429. ((#\v) #\vtab)
  430. ((#\b) #\backspace)
  431. ;; When faced with the choice between Guile's old
  432. ;; two-char \xHH escapes and R6RS \xHHH...;
  433. ;; escapes, prefer R6RS; \xHH is of limited
  434. ;; utility.
  435. ((#\x) (read-r6rs-hex-escape))
  436. ((#\u) (read-fixed-hex-escape 4))
  437. ((#\U) (read-fixed-hex-escape 6))
  438. (else
  439. (error "invalid character in escape sequence: ~S" ch)))
  440. out)
  441. (lp)))))
  442. (else
  443. (write-char ch out)
  444. (lp)))))))
  445. (define (read-character)
  446. (let ((ch (next)))
  447. (cond
  448. ((eof-object? ch)
  449. (error "unexpected end of input after #\\"))
  450. ((delimiter? ch)
  451. ch)
  452. (else
  453. (let* ((tok (read-token ch))
  454. (len (string-length tok)))
  455. (define dotted-circle #\x25cc)
  456. (define r5rs-charnames
  457. '(("space" . #\x20) ("newline" . #\x0a)))
  458. (define r6rs-charnames
  459. '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
  460. ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
  461. ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
  462. ("delete" . #\x7f)))
  463. (define r7rs-charnames
  464. '(("escape" . #\x1b)))
  465. (define C0-control-charnames
  466. '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
  467. ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
  468. ("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
  469. ("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
  470. ("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
  471. ("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
  472. ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
  473. ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
  474. ("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
  475. ("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
  476. ("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
  477. ("del" . #\x7f)))
  478. (define alt-charnames
  479. '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
  480. ;; Although R6RS and R7RS charnames specified as being
  481. ;; case-sensitive, Guile matches them case-insensitively, like
  482. ;; other char names.
  483. (define (named-char tok alist)
  484. (let ((tok (string-downcase tok)))
  485. (let lp ((alist alist))
  486. (match alist
  487. (() #f)
  488. (((name . ch) . alist)
  489. (if (string=? name tok) ch (lp alist)))))))
  490. (cond
  491. ((= len 1) ch)
  492. ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
  493. ;; Ignore dotted circles, which may be used to keep
  494. ;; combining characters from combining with the backslash in
  495. ;; #\charname.
  496. ch)
  497. ((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
  498. (string->number tok 8))
  499. ;; Specifying a codepoint as an octal value.
  500. => integer->char)
  501. ((and (eqv? ch #\x) (> len 1)
  502. (string->number (string-copy tok 1) 16))
  503. ;; Specifying a codepoint as an hexadecimal value. Skip
  504. ;; initial "x".
  505. => integer->char)
  506. ((named-char tok r5rs-charnames))
  507. ((named-char tok r6rs-charnames))
  508. ((named-char tok r7rs-charnames))
  509. ((named-char tok C0-control-charnames))
  510. ((named-char tok alt-charnames))
  511. (else
  512. (error "unknown character name ~a" tok))))))))
  513. (define (read-vector)
  514. (list->vector (map strip-annotation (read-parenthesized #\)))))
  515. (define (read-bytevector)
  516. (define (expect ch)
  517. (unless (eqv? (next) ch)
  518. (error "invalid bytevector prefix" ch)))
  519. (expect #\u)
  520. (expect #\8)
  521. (expect #\()
  522. (let ((p (open-output-bytevector)))
  523. (for-each (lambda (datum) (write-u8 (strip-annotation datum) p))
  524. (read-parenthesized #\)))
  525. (get-output-bytevector p)))
  526. ;; FIXME: We should require a terminating delimiter.
  527. (define (read-bitvector)
  528. (let lp ((bits '()) (len 0))
  529. (let ((ch (peek)))
  530. (case ch
  531. ((#\0) (next) (lp bits (1+ len)))
  532. ((#\1) (next) (lp (cons len bits) (1+ len)))
  533. (else
  534. (let ((bv (make-bitvector len #f)))
  535. (for-each (lambda (bit) (bitvector-set-bit! bv bit)) bits)
  536. bv))))))
  537. (define (read-true)
  538. (match (peek)
  539. ((or (? eof-object?) (? delimiter?))
  540. #t)
  541. (_ (match (read-token #\t)
  542. ((? (lambda (tok) (string=? (string-downcase tok) "true"))) #t)
  543. (tok (error "unexpected input when reading #true" tok))))))
  544. (define (read-false)
  545. (match (peek)
  546. ((or (? eof-object?) (? delimiter?))
  547. #f)
  548. (_ (match (string-downcase (read-token #\f))
  549. ((? (lambda (tok) (string=? (string-downcase tok) "false"))) #f)
  550. (tok (error "unexpected input when reading #false" tok))))))
  551. (define (read-keyword)
  552. (let ((expr (strip-annotation (read-subexpression "keyword"))))
  553. (unless (symbol? expr)
  554. (error "keyword prefix #: not followed by a symbol: ~a" expr))
  555. (symbol->keyword expr)))
  556. (define (read-number-and-radix ch)
  557. (let ((tok (string-append "#" (read-token ch))))
  558. (or (string->number tok)
  559. (error "unknown # object: ~S" tok))))
  560. (define (read-extended-symbol)
  561. (define (next-not-eof)
  562. (let ((ch (next)))
  563. (when (eof-object? ch)
  564. (error "end of input while reading symbol"))
  565. ch))
  566. (let ((out (open-output-string)))
  567. (let lp ((saw-brace? #f))
  568. (let lp/inner ((ch (next-not-eof))
  569. (saw-brace? saw-brace?))
  570. (cond
  571. (saw-brace?
  572. (unless (eqv? ch #\#)
  573. ;; Don't eat CH, see
  574. ;; <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=49623>.
  575. (write-char #\} out)
  576. (lp/inner ch #f)))
  577. ((eqv? ch #\})
  578. (lp #t))
  579. ((eqv? ch #\\)
  580. ;; \xH+; => R6RS hex escape
  581. ;; \C => C otherwise, for any C
  582. (let* ((ch (next-not-eof))
  583. (ch (if (eqv? ch #\x)
  584. (read-r6rs-hex-escape)
  585. ch)))
  586. (write-char ch out)
  587. (lp #f)))
  588. (else
  589. (write-char ch out)
  590. (lp #f)))))
  591. (string->symbol (get-output-string out))))
  592. (define (read-nil)
  593. ;; Have already read "#\n" -- now read "il".
  594. (match (read-mixed-case-symbol #\n)
  595. ('nil #nil)
  596. (id (error "unexpected input while reading #nil: ~a" id))))
  597. (define (read-sharp)
  598. (let* ((ch (next)))
  599. (cond
  600. ((eof-object? ch)
  601. (error "unexpected end of input after #"))
  602. (else
  603. (case ch
  604. ((#\\) (read-character))
  605. ((#\() (read-vector))
  606. ((#\v) (read-bytevector))
  607. ((#\*) (read-bitvector))
  608. ((#\f #\F) (read-false))
  609. ((#\t #\T) (read-true))
  610. ((#\:) (read-keyword))
  611. ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
  612. (read-number-and-radix ch))
  613. ((#\{) (read-extended-symbol))
  614. ((#\') (list 'syntax (read-subexpression "syntax expression")))
  615. ((#\`) (list 'quasisyntax
  616. (read-subexpression "quasisyntax expression")))
  617. ((#\,)
  618. (if (eqv? #\@ (peek))
  619. (begin
  620. (next)
  621. (list 'unsyntax-splicing
  622. (read-subexpression "unsyntax-splicing expression")))
  623. (list 'unsyntax (read-subexpression "unsyntax expression"))))
  624. ((#\n) (read-nil))
  625. (else
  626. (error "Unknown # object: ~S" (string #\# ch))))))))
  627. (define (read-number ch)
  628. (let ((str (read-token ch)))
  629. (or (string->number str)
  630. (string->symbol (if fold-case? (string-downcase str) str)))))
  631. (define (read-expr* ch)
  632. (case ch
  633. ((#\[) (read-parenthesized #\]))
  634. ((#\() (read-parenthesized #\)))
  635. ((#\") (read-string ch))
  636. ((#\|) (string->symbol (read-string ch)))
  637. ((#\') (list 'quote (read-subexpression "quoted expression")))
  638. ((#\`) (list 'quasiquote (read-subexpression "quasiquoted expression")))
  639. ((#\,) (cond
  640. ((eqv? #\@ (peek))
  641. (next)
  642. (list 'unquote-splicing (read-subexpression "subexpression of ,@")))
  643. (else
  644. (list 'unquote (read-subexpression "unquoted expression")))))
  645. ;; FIXME: read-sharp should recur if we read a comment
  646. ((#\#) (read-sharp))
  647. ((#\)) (error "unexpected \")\""))
  648. ((#\]) (error "unexpected \"]\""))
  649. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.) (read-number ch))
  650. (else (read-mixed-case-symbol ch))))
  651. (define (read-expr ch)
  652. (annotate (port-line port)
  653. (port-column port)
  654. (read-expr* ch)))
  655. (define (read-directive)
  656. (define (directive-char? ch)
  657. (and (char? ch)
  658. (or (eqv? ch #\-)
  659. (char-alphabetic? ch)
  660. (char-numeric? ch))))
  661. (let ((ch (peek)))
  662. (cond
  663. ((directive-char? ch)
  664. (next)
  665. (string->symbol (take-while ch directive-char?)))
  666. (else
  667. #f))))
  668. (define (skip-scsh-comment)
  669. (let lp ((ch (next)))
  670. (cond
  671. ((eof-object? ch)
  672. (error "unterminated `#! ... !#' comment"))
  673. ((eqv? ch #\!)
  674. (let ((ch (next)))
  675. (if (eqv? ch #\#)
  676. (next)
  677. (lp ch))))
  678. (else
  679. (lp (next))))))
  680. (define (process-shebang)
  681. ;; After having read #!, we complete either with #!r6rs,
  682. ;; #!fold-case, #!no-fold-case, or a SCSH block comment terminated
  683. ;; by !#.
  684. (match (read-directive)
  685. ('fold-case
  686. (set-fold-case?! #t)
  687. (next))
  688. ((or 'no-fold-case 'r6rs)
  689. (set-fold-case?! #f)
  690. (next))
  691. (_
  692. (skip-scsh-comment))))
  693. (define (skip-eol-comment)
  694. (let ((ch (next)))
  695. (cond
  696. ((eof-object? ch) ch)
  697. ((eq? ch #\newline) (next))
  698. (else (skip-eol-comment)))))
  699. ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
  700. ;; nested.
  701. (define (skip-r6rs-block-comment)
  702. ;; We have read #|, now looking for |#.
  703. (let ((ch (next)))
  704. (when (eof-object? ch)
  705. (error "unterminated `#| ... |#' comment"))
  706. (cond
  707. ((and (eqv? ch #\|) (eqv? (peek) #\#))
  708. ;; Done.
  709. (next)
  710. (values))
  711. ((and (eqv? ch #\#) (eqv? (peek) #\|))
  712. ;; A nested comment.
  713. (next)
  714. (skip-r6rs-block-comment)
  715. (skip-r6rs-block-comment))
  716. (else
  717. (skip-r6rs-block-comment)))))
  718. (define (read-subexpression what)
  719. (let ((ch (next-non-whitespace)))
  720. (when (eof-object? ch)
  721. (error (string-append "unexpected end of input while reading " what)))
  722. (read-expr ch)))
  723. (define (next-non-whitespace)
  724. (let lp ((ch (next)))
  725. (case ch
  726. ((#\;)
  727. (lp (skip-eol-comment)))
  728. ((#\#)
  729. (case (peek)
  730. ((#\!)
  731. (next)
  732. (lp (process-shebang)))
  733. ((#\;)
  734. (next)
  735. (read-subexpression "#; comment")
  736. (next-non-whitespace))
  737. ((#\|)
  738. (next)
  739. (skip-r6rs-block-comment)
  740. (next-non-whitespace))
  741. (else ch)))
  742. ((#\space #\return #\ff #\newline #\tab)
  743. (next-non-whitespace))
  744. (else ch))))
  745. (let ((ch (next-non-whitespace)))
  746. (if (eof-object? ch)
  747. ch
  748. (read-expr ch)))))