iconv.scm 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. ;;; Encoding and decoding byte representations of strings
  2. ;; Copyright (C) 2013 Free Software Foundation, Inc.
  3. ;;;; This library is free software; you can redistribute it and/or
  4. ;;;; modify it under the terms of the GNU Lesser General Public
  5. ;;;; License as published by the Free Software Foundation; either
  6. ;;;; version 3 of the License, or (at your option) any later version.
  7. ;;;;
  8. ;;;; This library is distributed in the hope that it will be useful,
  9. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. ;;;; Lesser General Public License for more details.
  12. ;;;;
  13. ;;;; You should have received a copy of the GNU Lesser General Public
  14. ;;;; License along with this library; if not, write to the Free Software
  15. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  16. ;;; Code:
  17. (define-module (ice-9 iconv)
  18. #:use-module (rnrs bytevectors)
  19. #:use-module (ice-9 binary-ports)
  20. #:use-module ((ice-9 rdelim) #:select (read-string))
  21. #:export (string->bytevector
  22. bytevector->string
  23. call-with-encoded-output-string))
  24. ;; Like call-with-output-string, but actually closes the port.
  25. (define (call-with-output-string* proc)
  26. (let ((port (open-output-string)))
  27. (proc port)
  28. (let ((str (get-output-string port)))
  29. (close-port port)
  30. str)))
  31. (define (call-with-output-bytevector* proc)
  32. (call-with-values (lambda () (open-bytevector-output-port))
  33. (lambda (port get-bytevector)
  34. (proc port)
  35. (let ((bv (get-bytevector)))
  36. (close-port port)
  37. bv))))
  38. (define* (call-with-encoded-output-string encoding proc
  39. #:optional
  40. (conversion-strategy 'error))
  41. "Call PROC on a fresh port. Encode the resulting string as a
  42. bytevector according to ENCODING, and return the bytevector."
  43. (if (and (string-ci=? encoding "utf-8")
  44. (eq? conversion-strategy 'error))
  45. ;; I don't know why, but this appears to be faster; at least for
  46. ;; serving examples/debug-sxml.scm (1464 reqs/s versus 850
  47. ;; reqs/s).
  48. (string->utf8 (call-with-output-string* proc))
  49. (call-with-output-bytevector*
  50. (lambda (port)
  51. (set-port-encoding! port encoding)
  52. (if conversion-strategy
  53. (set-port-conversion-strategy! port conversion-strategy))
  54. (proc port)))))
  55. ;; TODO: Provide C implementations that call scm_from_stringn and
  56. ;; friends?
  57. (define* (string->bytevector str encoding
  58. #:optional (conversion-strategy 'error))
  59. "Encode STRING according to ENCODING, which should be a string naming
  60. a character encoding, like \"utf-8\"."
  61. (if (and (string-ci=? encoding "utf-8")
  62. (eq? conversion-strategy 'error))
  63. (string->utf8 str)
  64. (call-with-encoded-output-string
  65. encoding
  66. (lambda (port)
  67. (display str port))
  68. conversion-strategy)))
  69. (define* (bytevector->string bv encoding
  70. #:optional (conversion-strategy 'error))
  71. "Decode the string represented by BV. The bytes in the bytevector
  72. will be interpreted according to ENCODING, which should be a string
  73. naming a character encoding, like \"utf-8\"."
  74. (if (and (string-ci=? encoding "utf-8")
  75. (eq? conversion-strategy 'error))
  76. (utf8->string bv)
  77. (let ((p (open-bytevector-input-port bv)))
  78. (set-port-encoding! p encoding)
  79. (if conversion-strategy
  80. (set-port-conversion-strategy! p conversion-strategy))
  81. (let ((res (read-string p)))
  82. (close-port p)
  83. (if (eof-object? res)
  84. ""
  85. res)))))