read.scm 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. ;;; R7RS (scheme read) library
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; R7RS (scheme read) implementation
  18. ;;;
  19. ;;; Code:
  20. (library (hoot read)
  21. (export read read-syntax string->number)
  22. (import (hoot bitvectors)
  23. (hoot char)
  24. (hoot eq)
  25. (hoot errors)
  26. (hoot exceptions)
  27. (hoot keywords)
  28. (hoot lists)
  29. (hoot match)
  30. (hoot not)
  31. (hoot numbers)
  32. (hoot pairs)
  33. (hoot ports)
  34. (hoot strings)
  35. (hoot symbols)
  36. (hoot syntax)
  37. (hoot syntax-objects)
  38. (hoot values)
  39. (hoot vectors))
  40. (define* (string->number str #:optional (radix 10))
  41. (let ((port (open-input-string str)))
  42. (define (read-bin-digit)
  43. (case (peek-char port)
  44. ((#\0 #\1)
  45. (- (char->integer (read-char port)) (char->integer #\0)))
  46. (else #f)))
  47. (define (read-oct-digit)
  48. (case (peek-char port)
  49. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7)
  50. (- (char->integer (read-char port)) (char->integer #\0)))
  51. (else #f)))
  52. (define (read-dec-digit)
  53. (case (peek-char port)
  54. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  55. (- (char->integer (read-char port)) (char->integer #\0)))
  56. (else #f)))
  57. (define (read-hex-digit)
  58. (case (peek-char port)
  59. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  60. (- (char->integer (read-char port)) (char->integer #\0)))
  61. ((#\a #\b #\c #\d #\e #\f)
  62. (+ 10 (- (char->integer (read-char port)) (char->integer #\a))))
  63. ((#\A #\B #\C #\D #\E #\F)
  64. (+ 10 (- (char->integer (read-char port)) (char->integer #\A))))
  65. (else #f)))
  66. (define (read-unsigned-int radix)
  67. (case radix
  68. ((2)
  69. (let ((x (read-bin-digit)))
  70. (and x
  71. (let loop ((x x))
  72. (let ((y (read-bin-digit)))
  73. (if y (loop (+ (* x 2) y)) x))))))
  74. ((8)
  75. (let ((x (read-oct-digit)))
  76. (and x
  77. (let loop ((x x))
  78. (let ((y (read-oct-digit)))
  79. (if y (loop (+ (* x 8) y)) x))))))
  80. ((10)
  81. (let ((x (read-dec-digit)))
  82. (and x
  83. (let loop ((x x))
  84. (let ((y (read-dec-digit)))
  85. (if y (loop (+ (* x 10) y)) x))))))
  86. ((16)
  87. (let ((x (read-hex-digit)))
  88. (and x
  89. (let loop ((x x))
  90. (let ((y (read-hex-digit)))
  91. (if y (loop (+ (* x 16) y)) x))))))))
  92. (define (read-sign)
  93. (let ((ch (peek-char port)))
  94. (cond
  95. ((eof-object? ch) #f)
  96. ((eqv? ch #\+)
  97. (read-char port)
  98. '+)
  99. ((eqv? ch #\-)
  100. (read-char port)
  101. '-)
  102. (else 'none))))
  103. (define (read-decimal n exactness)
  104. (case (peek-char port)
  105. ;; Decimal point
  106. ((#\.)
  107. (read-char port)
  108. (let ((ch (peek-char port)))
  109. ;; '0.' is a valid number, but '.' is not. 'n' being #f
  110. ;; signals the latter case.
  111. (if (eof-object? ch)
  112. (and n (inexact n))
  113. (let ((n (or n 0))
  114. (x (read-dec-digit)))
  115. (and x
  116. (let loop ((i -2) (x (* x (expt 10 -1))))
  117. (let ((y (read-dec-digit)))
  118. (if y
  119. (loop (- i 1) (+ x (* y (expt 10 i))))
  120. (let ((z (+ n x)))
  121. (or (read-decimal z exactness)
  122. (if (eq? exactness 'exact)
  123. z
  124. (inexact z))))))))))))
  125. ;; Exponent
  126. ((#\e #\E)
  127. (read-char port)
  128. (let* ((sign (read-sign))
  129. (x (read-unsigned-int 10)))
  130. (and x
  131. (let ((y (* n (expt 10 (if (eq? sign '-) (- x) x)))))
  132. (if (eq? exactness 'exact) y (inexact y))))))
  133. (else #f)))
  134. (define (read-unsigned radix exactness)
  135. (let ((ch (peek-char port)))
  136. (cond
  137. ((eof-object? ch) #f)
  138. ;; NaN
  139. ((or (eqv? ch #\n) (eqv? ch #\N))
  140. (read-char port)
  141. (case (read-char port)
  142. ((#\a #\A)
  143. (case (read-char port)
  144. ((#\n #\N)
  145. (case (read-char port)
  146. ((#\.)
  147. (case (read-char port)
  148. ((#\0) +nan.0)
  149. (else #f)))
  150. (else #f)))
  151. (else #f)))
  152. (else #f)))
  153. ;; Infinity
  154. ((or (eqv? ch #\i) (eqv? ch #\I))
  155. (read-char port)
  156. (let ((ch (peek-char port)))
  157. (cond
  158. ;; This might be a valid complex number, either '+i' or
  159. ;; '-i', so back up a char so the caller can check for
  160. ;; that case.
  161. ((eof-object? ch)
  162. (seek port -1 'cur)
  163. #f)
  164. ((or (eqv? ch #\n) (eqv? ch #\N))
  165. (read-char port)
  166. (case (read-char port)
  167. ((#\f #\F)
  168. (case (read-char port)
  169. ((#\.)
  170. (case (read-char port)
  171. ((#\0) +inf.0)
  172. (else #f)))
  173. (else #f)))
  174. (else #f)))
  175. (else #f))))
  176. ;; Decimal with no leading digits.
  177. ((eqv? ch #\.)
  178. (and (eqv? radix 10) (read-decimal #f exactness)))
  179. (else
  180. (let ((x (read-unsigned-int radix)))
  181. (and x
  182. (case (peek-char port)
  183. ;; Fraction
  184. ((#\/)
  185. (read-char port)
  186. (let ((y (read-unsigned-int radix)))
  187. (and y
  188. (let ((z (/ x y)))
  189. (if (eq? exactness 'inexact) (inexact z) z)))))
  190. ;; Decimal point or exponent
  191. ((#\. #\e #\E)
  192. (and (eqv? radix 10) (read-decimal x exactness)))
  193. (else
  194. (if (eq? exactness 'inexact) (inexact x) x)))))))))
  195. (define (read-complex radix exactness)
  196. (let ((sign (read-sign)))
  197. (and sign
  198. (let ((x (read-unsigned radix exactness)))
  199. (cond
  200. ((or (and (not x) (eq? sign 'none))
  201. ;; Infinities and NaNs need explicit sign.
  202. (and x (or (infinite? x) (nan? x)) (eq? sign 'none)))
  203. #f)
  204. ;; +i and -i cases.
  205. ((not x)
  206. (let ((ch (read-char port)))
  207. (and (or (eqv? ch #\i) (eqv? ch #\I))
  208. (if (eq? sign '+) +i -i))))
  209. ;; We've successfully read one real, now to check for
  210. ;; a polar or imaginary part.
  211. (else
  212. (let ((x (if (eq? sign '-) (- x) x)))
  213. (let ((ch (peek-char port)))
  214. (cond
  215. ((eof-object? ch) x)
  216. ;; Complex number in polar form.
  217. ((eqv? ch #\@)
  218. (read-char port)
  219. (let* ((sign (read-sign))
  220. (y (read-unsigned radix exactness)))
  221. (and y (make-polar x (if (eq? sign '-) (- y) y)))))
  222. ;; Complex number in rectangular form.
  223. ((or (eqv? ch #\+) (eqv? ch #\-))
  224. (let ((sign (read-sign))
  225. (y (or (read-unsigned radix exactness) 1.0)))
  226. (case (read-char port)
  227. ((#\i #\I)
  228. (make-rectangular x (if (eq? sign '-) (- y) y)))
  229. (else #f))))
  230. (else #f))))))))))
  231. (define (read-number)
  232. ;; First, read the radix and exactness prefix. These could be
  233. ;; specified in either order (like #x#e or #e#x), one could be
  234. ;; omitted (just #x or #e), or both could be omitted. When
  235. ;; exactness is omitted, exactness becomes implicit. For
  236. ;; example, '1.2' will produce an inexact value.
  237. (let loop ((radix* #f) (exactness #f))
  238. (let ((ch (peek-char port)))
  239. (cond
  240. ((eof-object? ch) #f)
  241. ((eqv? ch #\#)
  242. (read-char port)
  243. (let ((ch (read-char port)))
  244. (cond
  245. ((and (or (eqv? ch #\b) (eqv? ch #\B)) (not radix*))
  246. (loop 2 exactness))
  247. ((and (or (eqv? ch #\o) (eqv? ch #\O)) (not radix*))
  248. (loop 8 exactness))
  249. ((and (or (eqv? ch #\d) (eqv? ch #\D)) (not radix*))
  250. (loop 10 exactness))
  251. ((and (or (eqv? ch #\x) (eqv? ch #\X)) (not radix*))
  252. (loop 16 exactness))
  253. ((and (or (eqv? ch #\e) (eqv? ch #\E)) (not exactness))
  254. (loop radix* 'exact))
  255. ((and (or (eqv? ch #\i) (eqv? ch #\I)) (not exactness))
  256. (loop radix* 'inexact))
  257. (else #f))))
  258. (else
  259. (read-complex (or radix* radix) exactness))))))
  260. (let ((x (read-number)))
  261. ;; Input should be completely consumed at this point.
  262. (and (eof-object? (peek-char port)) x))))
  263. (define (%read port annotate strip-annotation)
  264. (define fold-case? (%port-fold-case? port))
  265. (define (set-fold-case?! val)
  266. (set! fold-case? val)
  267. (%set-port-fold-case?! port val))
  268. (define (next) (read-char port))
  269. (define (peek) (peek-char port))
  270. ;; We are only ever interested in whether an object is a char or not.
  271. (define (eof-object? x) (not (char? x)))
  272. (define (input-error msg args)
  273. (raise
  274. (make-exception (make-lexical-violation)
  275. (make-exception-with-origin "read")
  276. (make-exception-with-message msg)
  277. (make-exception-with-irritants args)
  278. (make-i/o-filename-error (port-filename port))
  279. (make-i/o-line-and-column-error (1+ (port-line port))
  280. (1+ (port-column port))))))
  281. (define-syntax-rule (error msg arg ...)
  282. (let ((args (list arg ...)))
  283. (input-error msg args)))
  284. (define (read-semicolon-comment)
  285. (let ((ch (next)))
  286. (cond
  287. ((eof-object? ch) ch)
  288. ((eqv? ch #\newline) (next))
  289. (else (read-semicolon-comment)))))
  290. (define-syntax-rule (take-until first pred)
  291. (let ((p (open-output-string)))
  292. (write-char first p)
  293. (let lp ()
  294. (let ((ch (peek)))
  295. (if (or (eof-object? ch) (pred ch))
  296. (get-output-string p)
  297. (begin
  298. (write-char ch p)
  299. (next)
  300. (lp)))))))
  301. (define-syntax-rule (take-while first pred)
  302. (take-until first (lambda (ch) (not (pred ch)))))
  303. (define (delimiter? ch)
  304. (case ch
  305. ((#\( #\) #\; #\" #\space #\return #\ff #\newline #\tab #\[ #\]) #t)
  306. (else #f)))
  307. (define (read-token ch)
  308. (take-until ch delimiter?))
  309. (define (read-mixed-case-symbol ch)
  310. (let ((str (read-token ch)))
  311. (string->symbol (if fold-case? (string-downcase str) str))))
  312. (define (read-parenthesized rdelim)
  313. (let lp ((ch (next-non-whitespace)))
  314. (when (eof-object? ch)
  315. (error "unexpected end of input while searching for: ~A"
  316. rdelim))
  317. (cond
  318. ((eqv? ch rdelim) '())
  319. ((or (eqv? ch #\)) (eqv? ch #\]))
  320. (error "mismatched close paren: ~A" ch))
  321. (else
  322. (let ((expr (read-expr ch)))
  323. ;; Note that it is possible for scm_read_expression to
  324. ;; return `.', but not as part of a dotted pair: as in
  325. ;; #{.}#. Indeed an example is here!
  326. (if (and (eqv? ch #\.) (eq? (strip-annotation expr) '#{.}#))
  327. (let* ((tail (read-subexpression "tail of improper list"))
  328. (close (next-non-whitespace)))
  329. (unless (eqv? close rdelim)
  330. (error "missing close paren: ~A" close))
  331. tail)
  332. (cons expr (lp (next-non-whitespace)))))))))
  333. (define (hex-digit ch)
  334. (case ch
  335. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)
  336. (- (char->integer ch) (char->integer #\0)))
  337. ((#\a #\b #\c #\d #\e #\f)
  338. (+ 10 (- (char->integer ch) (char->integer #\a))))
  339. ((#\A #\B #\C #\D #\E #\F)
  340. (+ 10 (- (char->integer ch) (char->integer #\A))))
  341. (else #f)))
  342. (define (read-r6rs-hex-escape)
  343. (let ((ch (next)))
  344. (cond
  345. ((hex-digit ch) =>
  346. (lambda (res)
  347. (let lp ((res res))
  348. (let ((ch (next)))
  349. (cond
  350. ((hex-digit ch) => (lambda (digit) (lp (+ (* 16 res) digit))))
  351. ((eqv? ch #\;) (integer->char res))
  352. ((eof-object? ch)
  353. (error "unexpected end of input in character escape sequence"))
  354. (else
  355. (error "invalid character in escape sequence: ~S" ch)))))))
  356. ((eof-object? ch)
  357. (error "unexpected end of input in character escape sequence"))
  358. (else
  359. (error "invalid character in escape sequence: ~S" ch)))))
  360. (define (read-fixed-hex-escape len)
  361. (let lp ((len len) (res 0))
  362. (if (zero? len)
  363. (integer->char res)
  364. (let ((ch (next)))
  365. (cond
  366. ((hex-digit ch) =>
  367. (lambda (digit)
  368. (lp (1- len) (+ (* res 16) digit))))
  369. ((eof-object? ch)
  370. (error "unexpected end of input in character escape sequence"))
  371. (else
  372. (error "invalid character in escape sequence: ~S" ch)))))))
  373. (define (char-intraline-whitespace? ch)
  374. ;; True for tab and for codepoints whose general category is Zs.
  375. (case ch
  376. ((#\tab #\space
  377. #\240 #\13200
  378. #\20000 #\20001 #\20002 #\20003 #\20004 #\20005
  379. #\20006 #\20007 #\20010 #\20011 #\20012
  380. #\20057
  381. #\20137
  382. #\30000) #t)
  383. (else #f)))
  384. (define (read-string rdelim)
  385. (let ((out (open-output-string)))
  386. (let lp ()
  387. (let ((ch (next)))
  388. (cond
  389. ((eof-object? ch)
  390. (error "unexpected end of input while reading string"))
  391. ((eqv? ch rdelim)
  392. (get-output-string out))
  393. ((eqv? ch #\\)
  394. (let ((ch (next)))
  395. (when (eof-object? ch)
  396. (error "unexpected end of input while reading string"))
  397. (cond
  398. ((eqv? ch #\newline)
  399. ;; Skip intraline whitespace before continuing.
  400. (let skip ()
  401. (let ((ch (peek)))
  402. (when (and (not (eof-object? ch))
  403. (char-intraline-whitespace? ch))
  404. (next)
  405. (skip))))
  406. (lp))
  407. ((eqv? ch rdelim)
  408. (write-char rdelim out)
  409. (lp))
  410. (else
  411. (write-char
  412. (case ch
  413. ;; Accept "\(" for use at the beginning of
  414. ;; lines in multiline strings to avoid
  415. ;; confusing emacs lisp modes.
  416. ((#\| #\\ #\() ch)
  417. ((#\0) #\nul)
  418. ((#\f) #\ff)
  419. ((#\n) #\newline)
  420. ((#\r) #\return)
  421. ((#\t) #\tab)
  422. ((#\a) #\alarm)
  423. ((#\v) #\vtab)
  424. ((#\b) #\backspace)
  425. ;; When faced with the choice between Guile's old
  426. ;; two-char \xHH escapes and R6RS \xHHH...;
  427. ;; escapes, prefer R6RS; \xHH is of limited
  428. ;; utility.
  429. ((#\x) (read-r6rs-hex-escape))
  430. ((#\u) (read-fixed-hex-escape 4))
  431. ((#\U) (read-fixed-hex-escape 6))
  432. (else
  433. (error "invalid character in escape sequence: ~S" ch)))
  434. out)
  435. (lp)))))
  436. (else
  437. (write-char ch out)
  438. (lp)))))))
  439. (define (read-character)
  440. (let ((ch (next)))
  441. (cond
  442. ((eof-object? ch)
  443. (error "unexpected end of input after #\\"))
  444. ((delimiter? ch)
  445. ch)
  446. (else
  447. (let* ((tok (read-token ch))
  448. (len (string-length tok)))
  449. (define dotted-circle #\x25cc)
  450. (define r5rs-charnames
  451. '(("space" . #\x20) ("newline" . #\x0a)))
  452. (define r6rs-charnames
  453. '(("nul" . #\x00) ("alarm" . #\x07) ("backspace" . #\x08)
  454. ("tab" . #\x09) ("linefeed" . #\x0a) ("vtab" . #\x0b)
  455. ("page" . #\x0c) ("return" . #\x0d) ("esc" . #\x1b)
  456. ("delete" . #\x7f)))
  457. (define r7rs-charnames
  458. '(("escape" . #\x1b)))
  459. (define C0-control-charnames
  460. '(("nul" . #\x00) ("soh" . #\x01) ("stx" . #\x02)
  461. ("etx" . #\x03) ("eot" . #\x04) ("enq" . #\x05)
  462. ("ack" . #\x06) ("bel" . #\x07) ("bs" . #\x08)
  463. ("ht" . #\x09) ("lf" . #\x0a) ("vt" . #\x0b)
  464. ("ff" . #\x0c) ("cr" . #\x0d) ("so" . #\x0e)
  465. ("si" . #\x0f) ("dle" . #\x10) ("dc1" . #\x11)
  466. ("dc2" . #\x12) ("dc3" . #\x13) ("dc4" . #\x14)
  467. ("nak" . #\x15) ("syn" . #\x16) ("etb" . #\x17)
  468. ("can" . #\x18) ("em" . #\x19) ("sub" . #\x1a)
  469. ("esc" . #\x1b) ("fs" . #\x1c) ("gs" . #\x1d)
  470. ("rs" . #\x1e) ("us" . #\x1f) ("sp" . #\x20)
  471. ("del" . #\x7f)))
  472. (define alt-charnames
  473. '(("null" . #\x0) ("nl" . #\x0a) ("np" . #\x0c)))
  474. ;; Although R6RS and R7RS charnames specified as being
  475. ;; case-sensitive, Guile matches them case-insensitively, like
  476. ;; other char names.
  477. (define (named-char tok alist)
  478. (let ((tok (string-downcase tok)))
  479. (let lp ((alist alist))
  480. (match alist
  481. (() #f)
  482. (((name . ch) . alist)
  483. (if (string=? name tok) ch (lp alist)))))))
  484. (cond
  485. ((= len 1) ch)
  486. ((and (= len 2) (eqv? (string-ref tok 1) dotted-circle))
  487. ;; Ignore dotted circles, which may be used to keep
  488. ;; combining characters from combining with the backslash in
  489. ;; #\charname.
  490. ch)
  491. ((and (<= (char->integer #\0) (char->integer ch) (char->integer #\7))
  492. (string->number tok 8))
  493. ;; Specifying a codepoint as an octal value.
  494. => integer->char)
  495. ((and (eqv? ch #\x) (> len 1)
  496. (string->number (string-copy tok 1) 16))
  497. ;; Specifying a codepoint as an hexadecimal value. Skip
  498. ;; initial "x".
  499. => integer->char)
  500. ((named-char tok r5rs-charnames))
  501. ((named-char tok r6rs-charnames))
  502. ((named-char tok r7rs-charnames))
  503. ((named-char tok C0-control-charnames))
  504. ((named-char tok alt-charnames))
  505. (else
  506. (error "unknown character name ~a" tok))))))))
  507. (define (read-vector)
  508. (list->vector (map strip-annotation (read-parenthesized #\)))))
  509. (define (read-bytevector)
  510. (define (expect ch)
  511. (unless (eqv? (next) ch)
  512. (error "invalid bytevector prefix" ch)))
  513. (expect #\u)
  514. (expect #\8)
  515. (expect #\()
  516. (let ((p (open-output-bytevector)))
  517. (for-each (lambda (datum) (write-u8 (strip-annotation datum) p))
  518. (read-parenthesized #\)))
  519. (get-output-bytevector p)))
  520. ;; FIXME: We should require a terminating delimiter.
  521. (define (read-bitvector)
  522. (let lp ((bits '()) (len 0))
  523. (let ((ch (peek)))
  524. (case ch
  525. ((#\0) (next) (lp bits (1+ len)))
  526. ((#\1) (next) (lp (cons len bits) (1+ len)))
  527. (else
  528. (let ((bv (make-bitvector len #f)))
  529. (for-each (lambda (bit) (bitvector-set-bit! bv bit)) bits)
  530. bv))))))
  531. (define (read-true)
  532. (match (peek)
  533. ((or (? eof-object?) (? delimiter?))
  534. #t)
  535. (_ (match (read-token #\t)
  536. ((? (lambda (tok) (string=? (string-downcase tok) "true"))) #t)
  537. (tok (error "unexpected input when reading #true" tok))))))
  538. (define (read-false)
  539. (match (peek)
  540. ((or (? eof-object?) (? delimiter?))
  541. #f)
  542. (_ (match (string-downcase (read-token #\f))
  543. ((? (lambda (tok) (string=? (string-downcase tok) "false"))) #f)
  544. (tok (error "unexpected input when reading #false" tok))))))
  545. (define (read-keyword)
  546. (let ((expr (strip-annotation (read-subexpression "keyword"))))
  547. (unless (symbol? expr)
  548. (error "keyword prefix #: not followed by a symbol: ~a" expr))
  549. (symbol->keyword expr)))
  550. (define (read-number-and-radix ch)
  551. (let ((tok (string-append "#" (read-token ch))))
  552. (or (string->number tok)
  553. (error "unknown # object: ~S" tok))))
  554. (define (read-extended-symbol)
  555. (define (next-not-eof)
  556. (let ((ch (next)))
  557. (when (eof-object? ch)
  558. (error "end of input while reading symbol"))
  559. ch))
  560. (let ((out (open-output-string)))
  561. (let lp ((saw-brace? #f))
  562. (let lp/inner ((ch (next-not-eof))
  563. (saw-brace? saw-brace?))
  564. (cond
  565. (saw-brace?
  566. (unless (eqv? ch #\#)
  567. ;; Don't eat CH, see
  568. ;; <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=49623>.
  569. (write-char #\} out)
  570. (lp/inner ch #f)))
  571. ((eqv? ch #\})
  572. (lp #t))
  573. ((eqv? ch #\\)
  574. ;; \xH+; => R6RS hex escape
  575. ;; \C => C otherwise, for any C
  576. (let* ((ch (next-not-eof))
  577. (ch (if (eqv? ch #\x)
  578. (read-r6rs-hex-escape)
  579. ch)))
  580. (write-char ch out)
  581. (lp #f)))
  582. (else
  583. (write-char ch out)
  584. (lp #f)))))
  585. (string->symbol (get-output-string out))))
  586. (define (read-nil)
  587. ;; Have already read "#\n" -- now read "il".
  588. (match (read-mixed-case-symbol #\n)
  589. ('nil #nil)
  590. (id (error "unexpected input while reading #nil: ~a" id))))
  591. (define (read-sharp)
  592. (let* ((ch (next)))
  593. (cond
  594. ((eof-object? ch)
  595. (error "unexpected end of input after #"))
  596. (else
  597. (case ch
  598. ((#\\) (read-character))
  599. ((#\() (read-vector))
  600. ((#\v) (read-bytevector))
  601. ((#\*) (read-bitvector))
  602. ((#\f #\F) (read-false))
  603. ((#\t #\T) (read-true))
  604. ((#\:) (read-keyword))
  605. ((#\i #\e #\b #\B #\o #\O #\d #\D #\x #\X #\I #\E)
  606. (read-number-and-radix ch))
  607. ((#\{) (read-extended-symbol))
  608. ((#\') (list 'syntax (read-subexpression "syntax expression")))
  609. ((#\`) (list 'quasisyntax
  610. (read-subexpression "quasisyntax expression")))
  611. ((#\,)
  612. (if (eqv? #\@ (peek))
  613. (begin
  614. (next)
  615. (list 'unsyntax-splicing
  616. (read-subexpression "unsyntax-splicing expression")))
  617. (list 'unsyntax (read-subexpression "unsyntax expression"))))
  618. ((#\n) (read-nil))
  619. (else
  620. (error "Unknown # object: ~S" (string #\# ch))))))))
  621. (define (read-number ch)
  622. (let ((str (read-token ch)))
  623. (or (string->number str)
  624. (string->symbol (if fold-case? (string-downcase str) str)))))
  625. (define (read-expr* ch)
  626. (case ch
  627. ((#\[) (read-parenthesized #\]))
  628. ((#\() (read-parenthesized #\)))
  629. ((#\") (read-string ch))
  630. ((#\|) (string->symbol (read-string ch)))
  631. ((#\') (list 'quote (read-subexpression "quoted expression")))
  632. ((#\`) (list 'quasiquote (read-subexpression "quasiquoted expression")))
  633. ((#\,) (cond
  634. ((eqv? #\@ (peek))
  635. (next)
  636. (list 'unquote-splicing (read-subexpression "subexpression of ,@")))
  637. (else
  638. (list 'unquote (read-subexpression "unquoted expression")))))
  639. ;; FIXME: read-sharp should recur if we read a comment
  640. ((#\#) (read-sharp))
  641. ((#\)) (error "unexpected \")\""))
  642. ((#\]) (error "unexpected \"]\""))
  643. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9 #\+ #\- #\.) (read-number ch))
  644. (else (read-mixed-case-symbol ch))))
  645. (define (read-expr ch)
  646. (annotate (port-line port)
  647. (port-column port)
  648. (read-expr* ch)))
  649. (define (read-directive)
  650. (define (directive-char? ch)
  651. (and (char? ch)
  652. (or (eqv? ch #\-)
  653. (char-alphabetic? ch)
  654. (char-numeric? ch))))
  655. (let ((ch (peek)))
  656. (cond
  657. ((directive-char? ch)
  658. (next)
  659. (string->symbol (take-while ch directive-char?)))
  660. (else
  661. #f))))
  662. (define (skip-scsh-comment)
  663. (let lp ((ch (next)))
  664. (cond
  665. ((eof-object? ch)
  666. (error "unterminated `#! ... !#' comment"))
  667. ((eqv? ch #\!)
  668. (let ((ch (next)))
  669. (if (eqv? ch #\#)
  670. (next)
  671. (lp ch))))
  672. (else
  673. (lp (next))))))
  674. (define (process-shebang)
  675. ;; After having read #!, we complete either with #!r6rs,
  676. ;; #!fold-case, #!no-fold-case, or a SCSH block comment terminated
  677. ;; by !#.
  678. (match (read-directive)
  679. ('fold-case
  680. (set-fold-case?! #t)
  681. (next))
  682. ((or 'no-fold-case 'r6rs)
  683. (set-fold-case?! #f)
  684. (next))
  685. (_
  686. (skip-scsh-comment))))
  687. (define (skip-eol-comment)
  688. (let ((ch (next)))
  689. (cond
  690. ((eof-object? ch) ch)
  691. ((eq? ch #\newline) (next))
  692. (else (skip-eol-comment)))))
  693. ;; Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
  694. ;; nested.
  695. (define (skip-r6rs-block-comment)
  696. ;; We have read #|, now looking for |#.
  697. (let ((ch (next)))
  698. (when (eof-object? ch)
  699. (error "unterminated `#| ... |#' comment"))
  700. (cond
  701. ((and (eqv? ch #\|) (eqv? (peek) #\#))
  702. ;; Done.
  703. (next)
  704. (values))
  705. ((and (eqv? ch #\#) (eqv? (peek) #\|))
  706. ;; A nested comment.
  707. (next)
  708. (skip-r6rs-block-comment)
  709. (skip-r6rs-block-comment))
  710. (else
  711. (skip-r6rs-block-comment)))))
  712. (define (read-subexpression what)
  713. (let ((ch (next-non-whitespace)))
  714. (when (eof-object? ch)
  715. (error (string-append "unexpected end of input while reading " what)))
  716. (read-expr ch)))
  717. (define (next-non-whitespace)
  718. (let lp ((ch (next)))
  719. (case ch
  720. ((#\;)
  721. (lp (skip-eol-comment)))
  722. ((#\#)
  723. (case (peek)
  724. ((#\!)
  725. (next)
  726. (lp (process-shebang)))
  727. ((#\;)
  728. (next)
  729. (read-subexpression "#; comment")
  730. (next-non-whitespace))
  731. ((#\|)
  732. (next)
  733. (skip-r6rs-block-comment)
  734. (next-non-whitespace))
  735. (else ch)))
  736. ((#\space #\return #\ff #\newline #\tab)
  737. (next-non-whitespace))
  738. (else ch))))
  739. (let ((ch (next-non-whitespace)))
  740. (if (eof-object? ch)
  741. ch
  742. (read-expr ch))))
  743. (define* (read #:optional (port (current-input-port)))
  744. ;; For read-syntax, we'd define these annotate / strip functions
  745. ;; differently, to create syntax objects instead.
  746. (define (annotate line column datum) datum)
  747. (define (strip-annotation datum) datum)
  748. (%read port annotate strip-annotation))
  749. (define* (read-syntax #:optional (port (current-input-port)))
  750. (define filename (port-filename port))
  751. (define (annotate line column datum)
  752. ;; Usually when reading compound expressions consisting of multiple
  753. ;; syntax objects, like lists, the "leaves" of the expression are
  754. ;; annotated but the "root" isn't. Like in (A . B), A and B will be
  755. ;; annotated but the pair won't. Therefore the usually correct
  756. ;; thing to do is to just annotate the result. However in the case
  757. ;; of reading ( . C), the result is the already annotated C, which
  758. ;; we don't want to re-annotate. Therefore we avoid re-annotating
  759. ;; already annotated objects.
  760. (if (syntax? datum)
  761. datum
  762. (datum->syntax #f ; No lexical context.
  763. datum
  764. #:source (vector filename line (1- column)))))
  765. (%read port annotate syntax->datum)))