codegen.scm 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. ;;;; codegen.scm --- code generation for composable parsers
  2. ;;;;
  3. ;;;; Copyright (C) 2011 Free Software Foundation, Inc.
  4. ;;;;
  5. ;;;; This library is free software; you can redistribute it and/or
  6. ;;;; modify it under the terms of the GNU Lesser General Public
  7. ;;;; License as published by the Free Software Foundation; either
  8. ;;;; version 3 of the License, or (at your option) any later version.
  9. ;;;;
  10. ;;;; This library is distributed in the hope that it will be useful,
  11. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. ;;;; Lesser General Public License for more details.
  14. ;;;;
  15. ;;;; You should have received a copy of the GNU Lesser General Public
  16. ;;;; License along with this library; if not, write to the Free Software
  17. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. ;;;;
  19. (define-module (ice-9 peg codegen)
  20. #:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!)
  21. #:use-module (ice-9 pretty-print)
  22. #:use-module (system base pmatch))
  23. (define-syntax single?
  24. (syntax-rules ()
  25. "Return #t if X is a list of one element."
  26. ((_ x)
  27. (pmatch x
  28. ((_) #t)
  29. (else #f)))))
  30. (define-syntax single-filter
  31. (syntax-rules ()
  32. "If EXP is a list of one element, return the element. Otherwise
  33. return EXP."
  34. ((_ exp)
  35. (pmatch exp
  36. ((,elt) elt)
  37. (,elts elts)))))
  38. (define-syntax push-not-null!
  39. (syntax-rules ()
  40. "If OBJ is non-null, push it onto LST, otherwise do nothing."
  41. ((_ lst obj)
  42. (if (not (null? obj))
  43. (push! lst obj)))))
  44. (define-syntax push!
  45. (syntax-rules ()
  46. "Push an object onto a list."
  47. ((_ lst obj)
  48. (set! lst (cons obj lst)))))
  49. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  50. ;;;;; CODE GENERATORS
  51. ;; These functions generate scheme code for parsing PEGs.
  52. ;; Conventions:
  53. ;; accum: (all name body none)
  54. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  55. ;; Code we generate will have a certain return structure depending on how we're
  56. ;; accumulating (the ACCUM variable).
  57. (define (cg-generic-ret accum name body-uneval at)
  58. ;; name, body-uneval and at are syntax
  59. #`(let ((body #,body-uneval))
  60. #,(cond
  61. ((and (eq? accum 'all) name)
  62. #`(list #,at
  63. (cond
  64. ((not (list? body)) (list '#,name body))
  65. ((null? body) '#,name)
  66. ((symbol? (car body)) (list '#,name body))
  67. (else (cons '#,name body)))))
  68. ((eq? accum 'name)
  69. #`(list #,at '#,name))
  70. ((eq? accum 'body)
  71. #`(list #,at
  72. (cond
  73. ((single? body) (car body))
  74. (else body))))
  75. ((eq? accum 'none)
  76. #`(list #,at '()))
  77. (else
  78. (begin
  79. (pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
  80. (pretty-print "Defaulting to accum of none.\n")
  81. #`(list #,at '()))))))
  82. ;; The short name makes the formatting below much easier to read.
  83. (define cggr cg-generic-ret)
  84. ;; Generates code that matches a particular string.
  85. ;; E.g.: (cg-string syntax "abc" 'body)
  86. (define (cg-string pat accum)
  87. (let ((plen (string-length pat)))
  88. #`(lambda (str len pos)
  89. (let ((end (+ pos #,plen)))
  90. (and (<= end len)
  91. (string= str #,pat pos end)
  92. #,(case accum
  93. ((all) #`(list end (list 'cg-string #,pat)))
  94. ((name) #`(list end 'cg-string))
  95. ((body) #`(list end #,pat))
  96. ((none) #`(list end '()))
  97. (else (error "bad accum" accum))))))))
  98. ;; Generates code for matching any character.
  99. ;; E.g.: (cg-peg-any syntax 'body)
  100. (define (cg-peg-any accum)
  101. #`(lambda (str len pos)
  102. (and (< pos len)
  103. #,(case accum
  104. ((all) #`(list (1+ pos)
  105. (list 'cg-peg-any (substring str pos (1+ pos)))))
  106. ((name) #`(list (1+ pos) 'cg-peg-any))
  107. ((body) #`(list (1+ pos) (substring str pos (1+ pos))))
  108. ((none) #`(list (1+ pos) '()))
  109. (else (error "bad accum" accum))))))
  110. ;; Generates code for matching a range of characters between start and end.
  111. ;; E.g.: (cg-range syntax #\a #\z 'body)
  112. (define (cg-range pat accum)
  113. (syntax-case pat ()
  114. ((start end)
  115. (if (not (and (char? (syntax->datum #'start))
  116. (char? (syntax->datum #'end))))
  117. (error "range PEG should have characters after it; instead got"
  118. #'start #'end))
  119. #`(lambda (str len pos)
  120. (and (< pos len)
  121. (let ((c (string-ref str pos)))
  122. (and (char>=? c start)
  123. (char<=? c end)
  124. #,(case accum
  125. ((all) #`(list (1+ pos) (list 'cg-range (string c))))
  126. ((name) #`(list (1+ pos) 'cg-range))
  127. ((body) #`(list (1+ pos) (string c)))
  128. ((none) #`(list (1+ pos) '()))
  129. (else (error "bad accum" accum))))))))))
  130. ;; Generate code to match a pattern and do nothing with the result
  131. (define (cg-ignore pat accum)
  132. (syntax-case pat ()
  133. ((inner)
  134. (compile-peg-pattern #'inner 'none))))
  135. (define (cg-capture pat accum)
  136. (syntax-case pat ()
  137. ((inner)
  138. (compile-peg-pattern #'inner 'body))))
  139. ;; Filters the accum argument to compile-peg-pattern for buildings like string
  140. ;; literals (since we don't want to tag them with their name if we're doing an
  141. ;; "all" accum).
  142. (define (builtin-accum-filter accum)
  143. (cond
  144. ((eq? accum 'all) 'body)
  145. ((eq? accum 'name) 'name)
  146. ((eq? accum 'body) 'body)
  147. ((eq? accum 'none) 'none)))
  148. (define baf builtin-accum-filter)
  149. ;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
  150. (define (cg-and clauses accum)
  151. #`(lambda (str len pos)
  152. (let ((body '()))
  153. #,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body))))
  154. ;; Internal function builder for AND (calls itself).
  155. (define (cg-and-int clauses accum str strlen at body)
  156. (syntax-case clauses ()
  157. (()
  158. (cggr accum 'cg-and #`(reverse #,body) at))
  159. ((first rest ...)
  160. #`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)))
  161. (and res
  162. ;; update AT and BODY then recurse
  163. (let ((newat (car res))
  164. (newbody (cadr res)))
  165. (set! #,at newat)
  166. (push-not-null! #,body (single-filter newbody))
  167. #,(cg-and-int #'(rest ...) accum str strlen at body)))))))
  168. ;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
  169. (define (cg-or clauses accum)
  170. #`(lambda (str len pos)
  171. #,(cg-or-int clauses (baf accum) #'str #'len #'pos)))
  172. ;; Internal function builder for OR (calls itself).
  173. (define (cg-or-int clauses accum str strlen at)
  174. (syntax-case clauses ()
  175. (()
  176. #f)
  177. ((first rest ...)
  178. #`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)
  179. #,(cg-or-int #'(rest ...) accum str strlen at)))))
  180. (define (cg-* args accum)
  181. (syntax-case args ()
  182. ((pat)
  183. #`(lambda (str strlen at)
  184. (let ((body '()))
  185. (let lp ((end at) (count 0))
  186. (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
  187. str strlen end))
  188. (new-end (if match (car match) end))
  189. (count (if (> new-end end) (1+ count) count)))
  190. (if (> new-end end)
  191. (push-not-null! body (single-filter (cadr match))))
  192. (if (and (> new-end end)
  193. #,#t)
  194. (lp new-end count)
  195. (let ((success #,#t))
  196. #,#`(and success
  197. #,(cggr (baf accum) 'cg-body
  198. #'(reverse body) #'new-end)))))))))))
  199. (define (cg-+ args accum)
  200. (syntax-case args ()
  201. ((pat)
  202. #`(lambda (str strlen at)
  203. (let ((body '()))
  204. (let lp ((end at) (count 0))
  205. (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
  206. str strlen end))
  207. (new-end (if match (car match) end))
  208. (count (if (> new-end end) (1+ count) count)))
  209. (if (> new-end end)
  210. (push-not-null! body (single-filter (cadr match))))
  211. (if (and (> new-end end)
  212. #,#t)
  213. (lp new-end count)
  214. (let ((success #,#'(>= count 1)))
  215. #,#`(and success
  216. #,(cggr (baf accum) 'cg-body
  217. #'(reverse body) #'new-end)))))))))))
  218. (define (cg-? args accum)
  219. (syntax-case args ()
  220. ((pat)
  221. #`(lambda (str strlen at)
  222. (let ((body '()))
  223. (let lp ((end at) (count 0))
  224. (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
  225. str strlen end))
  226. (new-end (if match (car match) end))
  227. (count (if (> new-end end) (1+ count) count)))
  228. (if (> new-end end)
  229. (push-not-null! body (single-filter (cadr match))))
  230. (if (and (> new-end end)
  231. #,#'(< count 1))
  232. (lp new-end count)
  233. (let ((success #,#t))
  234. #,#`(and success
  235. #,(cggr (baf accum) 'cg-body
  236. #'(reverse body) #'new-end)))))))))))
  237. (define (cg-followed-by args accum)
  238. (syntax-case args ()
  239. ((pat)
  240. #`(lambda (str strlen at)
  241. (let ((body '()))
  242. (let lp ((end at) (count 0))
  243. (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
  244. str strlen end))
  245. (new-end (if match (car match) end))
  246. (count (if (> new-end end) (1+ count) count)))
  247. (if (> new-end end)
  248. (push-not-null! body (single-filter (cadr match))))
  249. (if (and (> new-end end)
  250. #,#'(< count 1))
  251. (lp new-end count)
  252. (let ((success #,#'(= count 1)))
  253. #,#`(and success
  254. #,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
  255. (define (cg-not-followed-by args accum)
  256. (syntax-case args ()
  257. ((pat)
  258. #`(lambda (str strlen at)
  259. (let ((body '()))
  260. (let lp ((end at) (count 0))
  261. (let* ((match (#,(compile-peg-pattern #'pat (baf accum))
  262. str strlen end))
  263. (new-end (if match (car match) end))
  264. (count (if (> new-end end) (1+ count) count)))
  265. (if (> new-end end)
  266. (push-not-null! body (single-filter (cadr match))))
  267. (if (and (> new-end end)
  268. #,#'(< count 1))
  269. (lp new-end count)
  270. (let ((success #,#'(= count 1)))
  271. #,#`(if success
  272. #f
  273. #,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
  274. ;; Association list of functions to handle different expressions as PEGs
  275. (define peg-compiler-alist '())
  276. (define (add-peg-compiler! symbol function)
  277. (set! peg-compiler-alist
  278. (assq-set! peg-compiler-alist symbol function)))
  279. (add-peg-compiler! 'range cg-range)
  280. (add-peg-compiler! 'ignore cg-ignore)
  281. (add-peg-compiler! 'capture cg-capture)
  282. (add-peg-compiler! 'and cg-and)
  283. (add-peg-compiler! 'or cg-or)
  284. (add-peg-compiler! '* cg-*)
  285. (add-peg-compiler! '+ cg-+)
  286. (add-peg-compiler! '? cg-?)
  287. (add-peg-compiler! 'followed-by cg-followed-by)
  288. (add-peg-compiler! 'not-followed-by cg-not-followed-by)
  289. ;; Takes an arbitrary expressions and accumulation variable, then parses it.
  290. ;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
  291. (define (compile-peg-pattern pat accum)
  292. (syntax-case pat (peg-any)
  293. (peg-any
  294. (cg-peg-any (baf accum)))
  295. (sym (identifier? #'sym) ;; nonterminal
  296. #'sym)
  297. (str (string? (syntax->datum #'str)) ;; literal string
  298. (cg-string (syntax->datum #'str) (baf accum)))
  299. ((name . args) (let* ((nm (syntax->datum #'name))
  300. (entry (assq-ref peg-compiler-alist nm)))
  301. (if entry
  302. (entry #'args accum)
  303. (error "Bad peg form" nm #'args
  304. "Not one of" (map car peg-compiler-alist)))))))
  305. ;; Packages the results of a parser
  306. (define (wrap-parser-for-users for-syntax parser accumsym s-syn)
  307. #`(lambda (str strlen at)
  308. (let ((res (#,parser str strlen at)))
  309. ;; Try to match the nonterminal.
  310. (if res
  311. ;; If we matched, do some post-processing to figure out
  312. ;; what data to propagate upward.
  313. (let ((at (car res))
  314. (body (cadr res)))
  315. #,(cond
  316. ((eq? accumsym 'name)
  317. #`(list at '#,s-syn))
  318. ((eq? accumsym 'all)
  319. #`(list (car res)
  320. (cond
  321. ((not (list? body))
  322. (list '#,s-syn body))
  323. ((null? body) '#,s-syn)
  324. ((symbol? (car body))
  325. (list '#,s-syn body))
  326. (else (cons '#,s-syn body)))))
  327. ((eq? accumsym 'none) #`(list (car res) '()))
  328. (else #`(begin res))))
  329. ;; If we didn't match, just return false.
  330. #f))))