simple.scm 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. ;;;; (sxml simple) -- a simple interface to the SSAX parser
  2. ;;;;
  3. ;;;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
  4. ;;;; Modified 2004 by Andy Wingo <wingo at pobox dot com>.
  5. ;;;; Originally written by Oleg Kiselyov <oleg at pobox dot com> as SXML-to-HTML.scm.
  6. ;;;;
  7. ;;;; This library is free software; you can redistribute it and/or
  8. ;;;; modify it under the terms of the GNU Lesser General Public
  9. ;;;; License as published by the Free Software Foundation; either
  10. ;;;; version 3 of the License, or (at your option) any later version.
  11. ;;;;
  12. ;;;; This library is distributed in the hope that it will be useful,
  13. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;;;; Lesser General Public License for more details.
  16. ;;;;
  17. ;;;; You should have received a copy of the GNU Lesser General Public
  18. ;;;; License along with this library; if not, write to the Free Software
  19. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. ;;;;
  21. ;;; Commentary:
  22. ;;
  23. ;;A simple interface to XML parsing and serialization.
  24. ;;
  25. ;;; Code:
  26. (define-module (sxml simple)
  27. #:use-module (sxml ssax)
  28. #:use-module (sxml transform)
  29. #:use-module (ice-9 optargs)
  30. #:use-module (srfi srfi-13)
  31. #:export (xml->sxml sxml->xml sxml->string))
  32. (define* (xml->sxml #:optional (port (current-input-port)))
  33. "Use SSAX to parse an XML document into SXML. Takes one optional
  34. argument, @var{port}, which defaults to the current input port."
  35. (ssax:xml->sxml port '()))
  36. (define check-name
  37. (let ((*good-cache* (make-hash-table)))
  38. (lambda (name)
  39. (if (not (hashq-ref *good-cache* name))
  40. (let* ((str (symbol->string name))
  41. (i (string-index str #\:))
  42. (head (or (and i (substring str 0 i)) str))
  43. (tail (and i (substring str (1+ i)))))
  44. (and i (string-index (substring str (1+ i)) #\:)
  45. (error "Invalid QName: more than one colon" name))
  46. (for-each
  47. (lambda (s)
  48. (and s
  49. (or (char-alphabetic? (string-ref s 0))
  50. (eq? (string-ref s 0) #\_)
  51. (error "Invalid name starting character" s name))
  52. (string-for-each
  53. (lambda (c)
  54. (or (char-alphabetic? c) (string-index "0123456789.-_" c)
  55. (error "Invalid name character" c s name)))
  56. s)))
  57. (list head tail))
  58. (hashq-set! *good-cache* name #t))))))
  59. ;; The following two functions serialize tags and attributes. They are
  60. ;; being used in the node handlers for the post-order function, see
  61. ;; below.
  62. (define (attribute-value->xml value port)
  63. (cond
  64. ((pair? value)
  65. (attribute-value->xml (car value) port)
  66. (attribute-value->xml (cdr value) port))
  67. ((null? value)
  68. *unspecified*)
  69. ((string? value)
  70. (string->escaped-xml value port))
  71. ((procedure? value)
  72. (with-output-to-port port value))
  73. (else
  74. (string->escaped-xml
  75. (call-with-output-string (lambda (port) (display value port)))
  76. port))))
  77. (define (attribute->xml attr value port)
  78. (check-name attr)
  79. (display attr port)
  80. (display "=\"" port)
  81. (attribute-value->xml value port)
  82. (display #\" port))
  83. (define (element->xml tag attrs body port)
  84. (check-name tag)
  85. (display #\< port)
  86. (display tag port)
  87. (if attrs
  88. (let lp ((attrs attrs))
  89. (if (pair? attrs)
  90. (let ((attr (car attrs)))
  91. (display #\space port)
  92. (if (pair? attr)
  93. (attribute->xml (car attr) (cdr attr) port)
  94. (error "bad attribute" tag attr))
  95. (lp (cdr attrs)))
  96. (if (not (null? attrs))
  97. (error "bad attributes" tag attrs)))))
  98. (if (pair? body)
  99. (begin
  100. (display #\> port)
  101. (let lp ((body body))
  102. (cond
  103. ((pair? body)
  104. (sxml->xml (car body) port)
  105. (lp (cdr body)))
  106. ((null? body)
  107. (display "</" port)
  108. (display tag port)
  109. (display ">" port))
  110. (else
  111. (error "bad element body" tag body)))))
  112. (display " />" port)))
  113. ;; FIXME: ensure name is valid
  114. (define (entity->xml name port)
  115. (display #\& port)
  116. (display name port)
  117. (display #\; port))
  118. ;; FIXME: ensure tag and str are valid
  119. (define (pi->xml tag str port)
  120. (display "<?" port)
  121. (display tag port)
  122. (display #\space port)
  123. (display str port)
  124. (display "?>" port))
  125. (define* (sxml->xml tree #:optional (port (current-output-port)))
  126. "Serialize the sxml tree @var{tree} as XML. The output will be written
  127. to the current output port, unless the optional argument @var{port} is
  128. present."
  129. (cond
  130. ((pair? tree)
  131. (if (symbol? (car tree))
  132. ;; An element.
  133. (let ((tag (car tree)))
  134. (case tag
  135. ((*TOP*)
  136. (sxml->xml (cdr tree) port))
  137. ((*ENTITY*)
  138. (if (and (list? (cdr tree)) (= (length (cdr tree)) 1))
  139. (entity->xml (cadr tree) port)
  140. (error "bad *ENTITY* args" (cdr tree))))
  141. ((*PI*)
  142. (if (and (list? (cdr tree)) (= (length (cdr tree)) 2))
  143. (pi->xml (cadr tree) (caddr tree) port)
  144. (error "bad *PI* args" (cdr tree))))
  145. (else
  146. (let* ((elems (cdr tree))
  147. (attrs (and (pair? elems) (pair? (car elems))
  148. (eq? '@ (caar elems))
  149. (cdar elems))))
  150. (element->xml tag attrs (if attrs (cdr elems) elems) port)))))
  151. ;; A nodelist.
  152. (for-each (lambda (x) (sxml->xml x port)) tree)))
  153. ((string? tree)
  154. (string->escaped-xml tree port))
  155. ((null? tree) *unspecified*)
  156. ((not tree) *unspecified*)
  157. ((eqv? tree #t) *unspecified*)
  158. ((procedure? tree)
  159. (with-output-to-port port tree))
  160. (else
  161. (string->escaped-xml
  162. (call-with-output-string (lambda (port) (display tree port)))
  163. port))))
  164. (define (sxml->string sxml)
  165. "Detag an sxml tree @var{sxml} into a string. Does not perform any
  166. formatting."
  167. (string-concatenate-reverse
  168. (foldts
  169. (lambda (seed tree) ; fdown
  170. '())
  171. (lambda (seed kid-seed tree) ; fup
  172. (append! kid-seed seed))
  173. (lambda (seed tree) ; fhere
  174. (if (string? tree) (cons tree seed) seed))
  175. '()
  176. sxml)))
  177. (define (make-char-quotator char-encoding)
  178. (let ((bad-chars (list->char-set (map car char-encoding))))
  179. ;; Check to see if str contains one of the characters in charset,
  180. ;; from the position i onward. If so, return that character's index.
  181. ;; otherwise, return #f
  182. (define (index-cset str i charset)
  183. (string-index str charset i))
  184. ;; The body of the function
  185. (lambda (str port)
  186. (let ((bad-pos (index-cset str 0 bad-chars)))
  187. (if (not bad-pos)
  188. (display str port) ; str had all good chars
  189. (let loop ((from 0) (to bad-pos))
  190. (cond
  191. ((>= from (string-length str)) *unspecified*)
  192. ((not to)
  193. (display (substring str from (string-length str)) port))
  194. (else
  195. (let ((quoted-char
  196. (cdr (assv (string-ref str to) char-encoding)))
  197. (new-to
  198. (index-cset str (+ 1 to) bad-chars)))
  199. (if (< from to)
  200. (display (substring str from to) port))
  201. (display quoted-char port)
  202. (loop (1+ to) new-to))))))))))
  203. ;; Given a string, check to make sure it does not contain characters
  204. ;; such as '<' or '&' that require encoding. Return either the original
  205. ;; string, or a list of string fragments with special characters
  206. ;; replaced by appropriate character entities.
  207. (define string->escaped-xml
  208. (make-char-quotator
  209. '((#\< . "&lt;") (#\> . "&gt;") (#\& . "&amp;") (#\" . "&quot;"))))
  210. ;;; arch-tag: 9c853b25-d82f-42ef-a959-ae26fdc7d1ac
  211. ;;; simple.scm ends here