text-codec.scm 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. ; Part of Scheme 48 1.9. See file COPYING for notices and license.
  2. ; Authors: Mike Sperber
  3. ; Encoders/decoders from text to bytes and vice versa, for use by the
  4. ; the ports subsystem.
  5. ; Note that encoders and decoders must operate on buffers only
  6. ; provisionally.
  7. (define-record-type text-codec :text-codec
  8. (really-make-text-codec names
  9. builtin-code
  10. encode-char-proc
  11. decode-char-proc)
  12. text-codec?
  13. (names text-codec-names)
  14. ;; either #f or an integer from enum TEXT-ENCODING-OPTION
  15. ;; for encodings built into the VM
  16. (builtin-code text-codec-builtin-code)
  17. ;; (char buffer start count) -> (ok? #f or #bytes consumed or #bytes needed)
  18. (encode-char-proc text-codec-encode-char-proc)
  19. ;; (buffer start count) -> (char #bytes consumed)
  20. ;; or (#f #total bytes needed at least)
  21. ;; or (#f #f) (failure)
  22. (decode-char-proc text-codec-decode-char-proc))
  23. (define (make-builtin-text-codec names code)
  24. (really-make-text-codec names
  25. code
  26. (lambda (char buffer start count)
  27. (char->utf code char buffer start count))
  28. (lambda (buffer start count)
  29. (utf->char code buffer start count))))
  30. (define (make-text-codec names encode-char-proc decode-char-proc)
  31. (really-make-text-codec names #f encode-char-proc decode-char-proc))
  32. (define-record-discloser :text-codec
  33. (lambda (r)
  34. (cons 'text-codec (text-codec-names r))))
  35. (define *builtin-text-codecs*
  36. (make-vector (+ (max (enum text-encoding-option us-ascii)
  37. (enum text-encoding-option utf-8)
  38. (enum text-encoding-option utf-16le)
  39. (enum text-encoding-option utf-16be)
  40. (enum text-encoding-option utf-32le)
  41. (enum text-encoding-option utf-32be))
  42. 1)))
  43. (define (spec->text-codec spec)
  44. (if (text-codec? spec)
  45. spec
  46. (vector-ref *builtin-text-codecs* spec)))
  47. (define (text-codec->spec codec)
  48. (or (text-codec-builtin-code codec)
  49. codec))
  50. (define *text-codecs* '())
  51. (define (register-text-codec! codec)
  52. (set! *text-codecs* (cons codec *text-codecs*)))
  53. (define (find-text-codec name)
  54. (let loop ((codecs *text-codecs*))
  55. (cond
  56. ((null? codecs) #f)
  57. ((member name (text-codec-names (car codecs)))
  58. (car codecs))
  59. (else (loop (cdr codecs))))))
  60. (define-syntax define-text-codec
  61. (syntax-rules ()
  62. ((define-text-codec ?id (?name ...) ?encode-proc ?decode-proc)
  63. (begin
  64. (define ?id (make-text-codec '(?name ...) ?encode-proc ?decode-proc))
  65. (register-text-codec! ?id)))
  66. ((define-text-codec ?id ?name ?encode-proc ?decode-proc)
  67. (define-text-codec ?id (?name) ?encode-proc ?decode-proc))))
  68. (define-syntax define-builtin-text-codec
  69. (syntax-rules ()
  70. ((define-builtin-text-codec ?id (?name ...) ?enumerand)
  71. (begin
  72. (define ?id (make-builtin-text-codec '(?name ...) (enum text-encoding-option ?enumerand)))
  73. (register-text-codec! ?id)
  74. (vector-set! *builtin-text-codecs* (enum text-encoding-option ?enumerand)
  75. ?id)))
  76. ((define-builtin-text-codec ?id ?name ?enumerand)
  77. (define-builtin-text-codec ?id (?name) ?enumerand))))
  78. (define-text-codec null-text-codec "null"
  79. (lambda (char buffer start count)
  80. #f)
  81. (lambda (buffer start count)
  82. (values #f #f)))
  83. (define-builtin-text-codec us-ascii-codec
  84. ("US-ASCII"
  85. "ANSI_X3.4-1968" ; apparently, the POSIX locale on some Linux systems returns this
  86. )
  87. us-ascii)
  88. (define-builtin-text-codec latin-1-codec "ISO8859-1" latin-1)
  89. (define-builtin-text-codec utf-8-codec "UTF-8" utf-8)
  90. (define-builtin-text-codec utf-16le-codec "UTF-16LE" utf-16le)
  91. (define-builtin-text-codec utf-16be-codec "UTF-16BE" utf-16be)
  92. (define-builtin-text-codec utf-32le-codec "UTF-32LE" utf-32le)
  93. (define-builtin-text-codec utf-32be-codec "UTF-32BE" utf-32be)