read.scm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. ; -*- Mode: Scheme; Syntax: Scheme; Package: Scheme; -*-
  2. ; Part of Scheme 48 1.9. See file COPYING for notices and license.
  3. ; Authors: Richard Kelsey, Jonathan Rees, Mike Sperber
  4. ; A little Scheme reader.
  5. ; Nonstandard things used:
  6. ; ASCII stuff: ascii-whitespaces
  7. ; (for dispatch table; portable definition in alt/ascii.scm)
  8. ; Unicode: char->scalar-value, scalar-value->char
  9. ; reverse-list->string -- ok to define as follows:
  10. ; (define (reverse-list->string l n)
  11. ; (list->string (reverse l)))
  12. ; make-immutable! -- ok to define as follows:
  13. ; (define (make-immutable! x) x)
  14. ; signal (only for use by reading-error; easily excised)
  15. (define (read . port-option)
  16. (let ((port (input-port-option port-option)))
  17. (let loop ()
  18. (let ((form (sub-read port)))
  19. (cond ((not (reader-token? form))
  20. form)
  21. ((eq? form close-paren)
  22. ;; Too many right parens.
  23. (warning 'read "discarding extraneous right parenthesis" port)
  24. (loop))
  25. (else
  26. (reading-error port (cdr form))))))))
  27. (define (sub-read-carefully port)
  28. (let ((form (sub-read port)))
  29. (cond ((eof-object? form)
  30. (reading-error port "unexpected end of file"))
  31. ((reader-token? form) (reading-error port (cdr form)))
  32. (else form))))
  33. (define reader-token-marker (list 'reader-token))
  34. (define (make-reader-token message) (cons reader-token-marker message))
  35. (define (reader-token? form)
  36. (and (pair? form) (eq? (car form) reader-token-marker)))
  37. (define close-paren (make-reader-token "unexpected right parenthesis"))
  38. (define dot (make-reader-token "unexpected \" . \""))
  39. ; Main dispatch
  40. (define *dispatch-table-limit* 128)
  41. (define read-dispatch-vector
  42. (make-vector *dispatch-table-limit*
  43. (lambda (c port)
  44. (reading-error port "illegal character read" c))))
  45. (define read-terminating?-vector
  46. (make-vector *dispatch-table-limit* #t))
  47. (define (set-standard-syntax! char terminating? reader)
  48. (vector-set! read-dispatch-vector (char->scalar-value char) reader)
  49. (vector-set! read-terminating?-vector (char->scalar-value char) terminating?))
  50. (define (sub-read port)
  51. (let ((c (read-char port)))
  52. (if (eof-object? c)
  53. c
  54. (let ((scalar-value (char->scalar-value c)))
  55. (cond
  56. ((< scalar-value *dispatch-table-limit*)
  57. ((vector-ref read-dispatch-vector (char->scalar-value c))
  58. c port))
  59. ((char-alphabetic? c)
  60. (sub-read-constituent c port))
  61. (else
  62. (reading-error port "illegal character read" c)))))))
  63. (let ((sub-read-whitespace
  64. (lambda (c port)
  65. c ;ignored
  66. (sub-read port))))
  67. (for-each (lambda (c)
  68. (vector-set! read-dispatch-vector c sub-read-whitespace))
  69. ascii-whitespaces))
  70. (define (sub-read-constituent c port)
  71. (parse-token (sub-read-token c port) port))
  72. (for-each (lambda (c)
  73. (set-standard-syntax! c #f sub-read-constituent))
  74. (string->list
  75. (string-append "!$%&*+-./0123456789:<=>?@^_~ABCDEFGHIJKLM"
  76. "NOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")))
  77. ; Usual read macros
  78. (define (set-standard-read-macro! c terminating? proc)
  79. (set-standard-syntax! c terminating? proc))
  80. (define (sub-read-list c port)
  81. (let ((form (sub-read port)))
  82. (if (eq? form dot)
  83. (reading-error port
  84. "missing car -- ( immediately followed by .")
  85. (let recur ((form form))
  86. (cond ((eof-object? form)
  87. (reading-error port
  88. "end of file inside list -- unbalanced parentheses"))
  89. ((eq? form close-paren) '())
  90. ((eq? form dot)
  91. (let* ((last-form (sub-read-carefully port))
  92. (another-form (sub-read port)))
  93. (if (eq? another-form close-paren)
  94. last-form
  95. (reading-error port
  96. "randomness after form after dot"
  97. another-form))))
  98. (else
  99. (cons form (recur (sub-read port)))))))))
  100. (set-standard-read-macro! #\( #t sub-read-list)
  101. (set-standard-read-macro! #\) #t
  102. (lambda (c port)
  103. c port
  104. close-paren))
  105. (set-standard-read-macro! #\' #t
  106. (lambda (c port)
  107. c
  108. (list 'quote (sub-read-carefully port))))
  109. (set-standard-read-macro! #\` #t
  110. (lambda (c port)
  111. c
  112. (list 'quasiquote (sub-read-carefully port))))
  113. (set-standard-read-macro! #\, #t
  114. (lambda (c port)
  115. c
  116. (let* ((next (peek-char port))
  117. ;; DO NOT beta-reduce!
  118. (keyword (cond ((eof-object? next)
  119. (reading-error port "end of file after ,"))
  120. ((char=? next #\@)
  121. (read-char port)
  122. 'unquote-splicing)
  123. (else 'unquote))))
  124. (list keyword
  125. (sub-read-carefully port)))))
  126. ; Don't use non-R5RS char literals to avoid bootstrap circularities
  127. (define *nul* (scalar-value->char 0))
  128. (define *alarm* (scalar-value->char 7))
  129. (define *backspace* (scalar-value->char 8))
  130. (define *tab* (scalar-value->char 9))
  131. (define *linefeed* (scalar-value->char 10))
  132. (define *vtab* (scalar-value->char 11))
  133. (define *page* (scalar-value->char 12))
  134. (define *return* (scalar-value->char 13))
  135. (define *escape* (scalar-value->char 27))
  136. (define *rubout* (scalar-value->char 127))
  137. (set-standard-read-macro! #\" #t
  138. (lambda (c port)
  139. c ;ignored
  140. (let loop ((l '()) (i 0))
  141. (let ((c (read-char port)))
  142. (cond ((eof-object? c)
  143. (reading-error port "end of file within a string"))
  144. ((char=? c #\\)
  145. (cond
  146. ((decode-escape port)
  147. => (lambda (e)
  148. (loop (cons e l) (+ i 1))))
  149. (else (loop l i))))
  150. ((char=? c #\")
  151. (reverse-list->string l i))
  152. (else
  153. (loop (cons c l) (+ i 1))))))))
  154. (define (decode-escape port)
  155. (let ((c (read-char port)))
  156. (if (eof-object? c)
  157. (reading-error port "end of file within a string"))
  158. (let ((scalar-value (char->scalar-value c)))
  159. (cond
  160. ((or (char=? c #\\) (char=? c #\"))
  161. c)
  162. ((char=? c #\newline)
  163. ;; SRFI 75; skip intra-line whitespace
  164. (let loop ()
  165. (let ((c (peek-char port)))
  166. (cond
  167. ((eof-object? c)
  168. (reading-error port "end of file within a string"))
  169. ((char-unicode-whitespace? c)
  170. (read-char port)
  171. (loop))
  172. (else #f)))))
  173. ;; SRFI 75
  174. ((char=? c #\a) *alarm*)
  175. ((char=? c #\b) *backspace*)
  176. ((char=? c #\t) *tab*)
  177. ((char=? c #\n) *linefeed*)
  178. ((char=? c #\v) *vtab*)
  179. ((char=? c #\f) *page*)
  180. ((char=? c #\r) *return*)
  181. ((char=? c #\e) *escape*)
  182. ((char=? c #\x)
  183. (let ((d (decode-hex-digits port char-semicolon? "string literal")))
  184. (read-char port) ; remove semicolon
  185. d))
  186. (else
  187. (reading-error port
  188. "invalid escaped character in string"
  189. c))))))
  190. (define (char-semicolon? c)
  191. (equal? c #\;))
  192. ; The \x syntax is shared between character and string literals
  193. ; This doesn't remove the delimiter from the port.
  194. (define (decode-hex-digits port delimiter? desc)
  195. (let loop ((rev-digits '()))
  196. (let ((c (peek-char port)))
  197. (cond
  198. ((delimiter? c)
  199. (scalar-value->char
  200. (string->number (list->string (reverse rev-digits)) 16)))
  201. ((eof-object? c)
  202. (reading-error
  203. port
  204. (string-append "premature end of a scalar-value literal within a " desc)))
  205. ((not (char-hex-digit? c))
  206. (reading-error port
  207. (string-append "invalid hex digit in a " desc)
  208. c))
  209. (else
  210. (read-char port)
  211. (loop (cons c rev-digits)))))))
  212. (define (char-hex-digit? c)
  213. (let ((scalar-value (char->scalar-value c)))
  214. (or (and (>= scalar-value 48) ; #\0
  215. (<= scalar-value 57)) ; #\9
  216. (and (>= scalar-value 65) ; #\A
  217. (<= scalar-value 70)) ; #\F
  218. (and (>= scalar-value 97) ; #\a
  219. (<= scalar-value 102))))) ; #\f
  220. (set-standard-read-macro! #\; #t
  221. (lambda (c port)
  222. c ;ignored
  223. (gobble-line port)
  224. (sub-read port)))
  225. (define (gobble-line port)
  226. (let loop ()
  227. (let ((c (read-char port)))
  228. (cond ((eof-object? c) c)
  229. ((char=? c #\newline) #f)
  230. (else (loop))))))
  231. (define *sharp-macros* '())
  232. (define (define-sharp-macro c proc)
  233. (set! *sharp-macros* (cons (cons c proc) *sharp-macros*)))
  234. (set-standard-read-macro! #\# #f
  235. (lambda (c port)
  236. c ;ignored
  237. (let* ((c (peek-char port))
  238. (c (if (eof-object? c)
  239. (reading-error port "end of file after #")
  240. (char-downcase c)))
  241. (probe (assq c *sharp-macros*)))
  242. (if probe
  243. ((cdr probe) c port)
  244. (reading-error port "unknown # syntax" c)))))
  245. (define-sharp-macro #\f
  246. (lambda (c port) (read-char port) #f))
  247. (define-sharp-macro #\t
  248. (lambda (c port) (read-char port) #t))
  249. ; These are from Matthew Flatt's Unicode proposal for R6RS
  250. ; See write.scm.
  251. ; Richard will hopefully provide a fancy version of this that provides
  252. ; all the names in the Unicode character database.
  253. (define *char-name-table*
  254. (list
  255. (cons 'space #\space)
  256. (cons 'newline #\newline)
  257. (cons 'nul *nul*)
  258. (cons 'alarm *alarm*)
  259. (cons 'backspace *backspace*)
  260. (cons 'tab *tab*)
  261. (cons 'linefeed *linefeed*)
  262. (cons 'vtab *vtab*)
  263. (cons 'page *page*)
  264. (cons 'return *return*)
  265. (cons 'escape *escape*)
  266. (cons 'rubout *rubout*)))
  267. (define-sharp-macro #\\
  268. (lambda (c port)
  269. (read-char port)
  270. (let ((c (peek-char port)))
  271. (cond ((eof-object? c)
  272. (reading-error port "end of file after #\\"))
  273. ((char=? #\x c)
  274. (read-char port)
  275. (if (delimiter? (peek-char port))
  276. c
  277. (decode-hex-digits port char-scalar-value-literal-delimiter? "char literal")))
  278. ((char-alphabetic? c)
  279. (let ((name (sub-read-carefully port)))
  280. (cond ((= (string-length (symbol->string name)) 1)
  281. c)
  282. ((assq name *char-name-table*)
  283. => cdr)
  284. (else
  285. (reading-error port "unknown #\\ name" name)))))
  286. (else
  287. (read-char port)
  288. c)))))
  289. (define (char-scalar-value-literal-delimiter? c)
  290. (or (eof-object? c)
  291. (delimiter? c)))
  292. (define-sharp-macro #\(
  293. (lambda (c port)
  294. (read-char port)
  295. (let ((elts (sub-read-list c port)))
  296. (if (proper-list? elts)
  297. (list->vector elts)
  298. (reading-error port "dot in #(...)")))))
  299. (define (proper-list? x)
  300. (cond ((null? x) #t)
  301. ((pair? x) (proper-list? (cdr x)))
  302. (else #f)))
  303. (let ((number-sharp-macro
  304. (lambda (c port)
  305. (let ((string (sub-read-token #\# port)))
  306. (or (string->number string)
  307. (reading-error port "unsupported number syntax" string))))))
  308. (for-each (lambda (c)
  309. (define-sharp-macro c number-sharp-macro))
  310. '(#\b #\o #\d #\x #\i #\e)))
  311. ; Tokens
  312. (define (sub-read-token c port)
  313. (let loop ((l (list (preferred-case c))) (n 1))
  314. (let ((c (peek-char port)))
  315. (cond
  316. ((eof-object? c)
  317. (reverse-list->string l n))
  318. ((char=? c #\\)
  319. (read-char port)
  320. (let ((c (peek-char port)))
  321. (cond
  322. ((or (eof-object? c)
  323. (not (char=? #\x c)))
  324. (reading-error port "invalid escape sequence in a symbol"
  325. c))
  326. (else
  327. (read-char port)
  328. (let ((d (decode-hex-digits port char-semicolon? "symbol literal")))
  329. (read-char port) ; remove semicolon
  330. (loop (cons d l) (+ n 1)))))))
  331. (else
  332. (let ((sv (char->scalar-value c)))
  333. (if (if (< sv *dispatch-table-limit*)
  334. (vector-ref read-terminating?-vector sv)
  335. (binary-search *non-symbol-constituents-above-127* sv))
  336. (reverse-list->string l n)
  337. (begin
  338. (read-char port)
  339. (loop (cons (preferred-case c) l)
  340. (+ n 1))))))))))
  341. (define (parse-token string port)
  342. (if (let ((c (string-ref string 0)))
  343. (or (char-numeric? c) (char=? c #\+) (char=? c #\-) (char=? c #\.)))
  344. (cond ((string->number string))
  345. ((member string strange-symbol-names)
  346. (string->symbol (make-immutable! string)))
  347. ((string=? string ".")
  348. dot)
  349. (else
  350. (reading-error port "unsupported number syntax" string)))
  351. (string->symbol (make-immutable! string))))
  352. (define strange-symbol-names
  353. '("+" "-" "..."
  354. "->" ;Only for JAR's thesis
  355. ))
  356. (define (delimiter? c)
  357. (or (char-unicode-whitespace? c)
  358. (char=? c #\))
  359. (char=? c #\()
  360. (char=? c #\")
  361. (char=? c #\;)))
  362. (define (char-unicode-whitespace? c)
  363. (binary-search *whitespaces* (char->scalar-value c)))
  364. ;--- This loses because the compiler won't in-line it.
  365. ; and it's in READ's inner loop.
  366. (define preferred-case
  367. (if (char=? (string-ref (symbol->string 't) 0) #\T)
  368. char-upcase
  369. char-downcase))
  370. ; For ASCII, we previously had this hand-hacked version,
  371. ; (define p-c-v (make-string ascii-limit #\0))
  372. ;
  373. ; (let ((p-c (if (char=? (string-ref (symbol->string 't) 0) #\T)
  374. ; char-upcase
  375. ; char-downcase)))
  376. ; (do ((i 0 (+ i 1)))
  377. ; ((>= i ascii-limit))
  378. ; (string-set! p-c-v i (p-c (ascii->char i)))))
  379. ;
  380. ; (define (preferred-case c)
  381. ; (string-ref p-c-v (char->ascii c)))
  382. ; Reader errors
  383. (define-condition-type &read-error &error
  384. make-read-error read-error?)
  385. (define (reading-error port message . irritants)
  386. (raise
  387. (condition
  388. (make-read-error)
  389. (make-i/o-port-error port)
  390. (make-who-condition 'read)
  391. (make-message-condition message)
  392. (make-irritants-condition irritants))))
  393. ; returns index of value (must be number) in vector
  394. (define (binary-search vec val)
  395. (let ((size (vector-length vec)))
  396. (let loop ((low 0) ; inclusive
  397. (high size)) ; exclusive
  398. (cond
  399. ((< low (- high 1))
  400. (let* ((pos (quotient (+ low high) 2)) ; always in
  401. (at (vector-ref vec pos)))
  402. (cond
  403. ((= val at) pos)
  404. ((< val at)
  405. (loop low pos))
  406. (else
  407. (loop pos high)))))
  408. ((< low high)
  409. (if (= val (vector-ref vec low))
  410. low
  411. #f))
  412. (else #f)))))