parser.scm 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. ;;; Guile Emacs Lisp
  2. ;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
  3. ;;;
  4. ;;; This library is free software; you can redistribute it and/or
  5. ;;; modify it under the terms of the GNU Lesser General Public
  6. ;;; License as published by the Free Software Foundation; either
  7. ;;; version 3 of the License, or (at your option) any later version.
  8. ;;;
  9. ;;; This library is distributed in the hope that it will be useful,
  10. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;;; Lesser General Public License for more details.
  13. ;;;
  14. ;;; You should have received a copy of the GNU Lesser General Public
  15. ;;; License along with this library; if not, write to the Free Software
  16. ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17. ;;; Code:
  18. (define-module (language elisp parser)
  19. #:use-module (language elisp lexer)
  20. #:export (read-elisp))
  21. ;;; The parser (reader) for elisp expressions.
  22. ;;;
  23. ;;; It is hand-written (just as the lexer is) instead of using some
  24. ;;; parser generator because this allows easier transfer of source
  25. ;;; properties from the lexer ((text parse-lalr) seems not to allow
  26. ;;; access to the original lexer token-pair) and is easy enough anyways.
  27. ;;; Report a parse error. The first argument is some current lexer
  28. ;;; token where source information is available should it be useful.
  29. (define (parse-error token msg . args)
  30. (apply error msg args))
  31. ;;; For parsing circular structures, we keep track of definitions in a
  32. ;;; hash-map that maps the id's to their values. When defining a new
  33. ;;; id, though, we immediatly fill the slot with a promise before
  34. ;;; parsing and setting the real value, because it must already be
  35. ;;; available at that time in case of a circular reference. The promise
  36. ;;; refers to a local variable that will be set when the real value is
  37. ;;; available through a closure. After parsing the expression is
  38. ;;; completed, we work through it again and force all promises we find.
  39. ;;; The definitions themselves are stored in a fluid and their scope is
  40. ;;; one call to read-elisp (but not only the currently parsed
  41. ;;; expression!).
  42. (define circular-definitions (make-fluid))
  43. (define (make-circular-definitions)
  44. (make-hash-table))
  45. (define (circular-ref token)
  46. (if (not (eq? (car token) 'circular-ref))
  47. (error "invalid token for circular-ref" token))
  48. (let* ((id (cdr token))
  49. (value (hashq-ref (fluid-ref circular-definitions) id)))
  50. (if value
  51. value
  52. (parse-error token "undefined circular reference" id))))
  53. ;;; Returned is a closure that, when invoked, will set the final value.
  54. ;;; This means both the variable the promise will return and the
  55. ;;; hash-table slot so we don't generate promises any longer.
  56. (define (circular-define! token)
  57. (if (not (eq? (car token) 'circular-def))
  58. (error "invalid token for circular-define!" token))
  59. (let ((value #f)
  60. (table (fluid-ref circular-definitions))
  61. (id (cdr token)))
  62. (hashq-set! table id (delay value))
  63. (lambda (real-value)
  64. (set! value real-value)
  65. (hashq-set! table id real-value))))
  66. ;;; Work through a parsed data structure and force the promises there.
  67. ;;; After a promise is forced, the resulting value must not be recursed
  68. ;;; on; this may lead to infinite recursion with a circular structure,
  69. ;;; and additionally this value was already processed when it was
  70. ;;; defined. All deep data structures that can be parsed must be
  71. ;;; handled here!
  72. (define (force-promises! data)
  73. (cond
  74. ((pair? data)
  75. (begin
  76. (if (promise? (car data))
  77. (set-car! data (force (car data)))
  78. (force-promises! (car data)))
  79. (if (promise? (cdr data))
  80. (set-cdr! data (force (cdr data)))
  81. (force-promises! (cdr data)))))
  82. ((vector? data)
  83. (let ((len (vector-length data)))
  84. (let iterate ((i 0))
  85. (if (< i len)
  86. (let ((el (vector-ref data i)))
  87. (if (promise? el)
  88. (vector-set! data i (force el))
  89. (force-promises! el))
  90. (iterate (1+ i)))))))
  91. ;; Else nothing needs to be done.
  92. ))
  93. ;;; We need peek-functionality for the next lexer token, this is done
  94. ;;; with some single token look-ahead storage. This is handled by a
  95. ;;; closure which allows getting or peeking the next token. When one
  96. ;;; expression is fully parsed, we don't want a look-ahead stored here
  97. ;;; because it would miss from future parsing. This is verified by the
  98. ;;; finish action.
  99. (define (make-lexer-buffer lex)
  100. (let ((look-ahead #f))
  101. (lambda (action)
  102. (if (eq? action 'finish)
  103. (if look-ahead
  104. (error "lexer-buffer is not empty when finished")
  105. #f)
  106. (begin
  107. (if (not look-ahead)
  108. (set! look-ahead (lex)))
  109. (case action
  110. ((peek) look-ahead)
  111. ((get)
  112. (let ((result look-ahead))
  113. (set! look-ahead #f)
  114. result))
  115. (else (error "invalid lexer-buffer action" action))))))))
  116. ;;; Get the contents of a list, where the opening parentheses has
  117. ;;; already been found. The same code is used for vectors and lists,
  118. ;;; where lists allow the dotted tail syntax and vectors not;
  119. ;;; additionally, the closing parenthesis must of course match. The
  120. ;;; implementation here is not tail-recursive, but I think it is clearer
  121. ;;; and simpler this way.
  122. (define (get-list lex allow-dot close-square)
  123. (let* ((next (lex 'peek))
  124. (type (car next)))
  125. (cond
  126. ((eq? type (if close-square 'square-close 'paren-close))
  127. (begin
  128. (if (not (eq? (car (lex 'get)) type))
  129. (error "got different token than peeked"))
  130. '()))
  131. ((and allow-dot (eq? type 'dot))
  132. (begin
  133. (if (not (eq? (car (lex 'get)) type))
  134. (error "got different token than peeked"))
  135. (let ((tail (get-list lex #f close-square)))
  136. (if (not (= (length tail) 1))
  137. (parse-error next
  138. "expected exactly one element after dot"))
  139. (car tail))))
  140. (else
  141. ;; Do both parses in exactly this sequence!
  142. (let* ((head (get-expression lex))
  143. (tail (get-list lex allow-dot close-square)))
  144. (cons head tail))))))
  145. ;;; Parse a single expression from a lexer-buffer. This is the main
  146. ;;; routine in our recursive-descent parser.
  147. (define quotation-symbols '((quote . quote)
  148. (backquote . #{`}#)
  149. (unquote . #{,}#)
  150. (unquote-splicing . #{,@}#)))
  151. (define (get-expression lex)
  152. (let* ((token (lex 'get))
  153. (type (car token))
  154. (return (lambda (result)
  155. (if (pair? result)
  156. (set-source-properties!
  157. result
  158. (source-properties token)))
  159. result)))
  160. (case type
  161. ((eof)
  162. (parse-error token "end of file during parsing"))
  163. ((integer float symbol character string)
  164. (return (cdr token)))
  165. ((function)
  166. (return `(function ,(get-expression lex))))
  167. ((quote backquote unquote unquote-splicing)
  168. (return (list (assq-ref quotation-symbols type)
  169. (get-expression lex))))
  170. ((paren-open)
  171. (return (get-list lex #t #f)))
  172. ((square-open)
  173. (return (list->vector (get-list lex #f #t))))
  174. ((circular-ref)
  175. (circular-ref token))
  176. ((circular-def)
  177. ;; The order of definitions is important!
  178. (let* ((setter (circular-define! token))
  179. (expr (get-expression lex)))
  180. (setter expr)
  181. (force-promises! expr)
  182. expr))
  183. ((set-lexical-binding-mode!)
  184. (return `(%set-lexical-binding-mode ,(cdr token))))
  185. (else
  186. (parse-error token "expected expression, got" token)))))
  187. ;;; Define the reader function based on this; build a lexer, a
  188. ;;; lexer-buffer, and then parse a single expression to return. We also
  189. ;;; define a circular-definitions data structure to use.
  190. (define (read-elisp port)
  191. (with-fluids ((circular-definitions (make-circular-definitions)))
  192. (let* ((lexer (get-lexer port))
  193. (lexbuf (make-lexer-buffer lexer))
  194. (next (lexbuf 'peek)))
  195. (if (eq? (car next) 'eof)
  196. (cdr next)
  197. (let ((result (get-expression lexbuf)))
  198. (lexbuf 'finish)
  199. result)))))