os-string.scm 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. ; Part of Scheme 48 1.9. See file COPYING for notices and license.
  2. ; Authors: Mike Sperber, Robert Ransom
  3. ; You may think that file names / environment variables / user names
  4. ; etc. are just text, but on most platforms, that assumption is wrong:
  5. ; They are usually NUL-terminated byte strings in some format. The
  6. ; bytes are invariant, but the corresponding text may depend on the
  7. ; locale. Also, byte sequences without a textual representation are
  8. ; possible.
  9. ; We assume that OS strings are encoded in some conservative extension
  10. ; of NUL-terminated ASCII. On Unix, this assumption pretty much has
  11. ; to hold true because of the various constraints of locale handling
  12. ; there. The Windows API uses an extension of UTF-16 that includes
  13. ; unpaired surrogates. For this, we use a synthetic extension of
  14. ; UTF-8 called UTF-8of16 that also deals with unpaired surrogates.
  15. ; #### lossiness
  16. (define-record-type os-string :os-string
  17. (really-make-os-string text-codec string byte-vector)
  18. os-string?
  19. (text-codec os-string-text-codec)
  20. ; may be #f, will get cached value
  21. (string os-string-string set-os-string-string!)
  22. ; may be #f, will get cached value
  23. (byte-vector os-string-byte-vector set-os-string-byte-vector!))
  24. (define-record-discloser :os-string
  25. (lambda (oss)
  26. (list "OS-string"
  27. (text-codec-names (os-string-text-codec oss))
  28. (os-string->string oss))))
  29. (define *initial-os-string-text-codec* #f)
  30. (define (initialize-os-string-text-codec!)
  31. (set! *initial-os-string-text-codec*
  32. (or (find-text-codec
  33. (system-parameter (enum system-parameter-option os-string-encoding)))
  34. us-ascii-codec)))
  35. (define $os-string-text-codec
  36. (make-fluid
  37. (lambda () *initial-os-string-text-codec*)))
  38. (define (current-os-string-text-codec)
  39. ((fluid $os-string-text-codec)))
  40. (define (call-with-os-string-text-codec codec thunk)
  41. (let-fluid $os-string-text-codec (lambda () codec)
  42. thunk))
  43. (define (make-os-string codec thing)
  44. (call-with-values
  45. (lambda ()
  46. (cond
  47. ((string? thing)
  48. (values (make-immutable! thing) #f))
  49. ((byte-vector? thing)
  50. (values #f (make-immutable! (byte-vector-copy-z thing))))
  51. (else
  52. (assertion-violation 'make-os-string "invalid argument" thing))))
  53. (lambda (str bv)
  54. (really-make-os-string codec str bv))))
  55. (define (string->os-string s)
  56. (let ((c (string-copy s)))
  57. (make-immutable! c)
  58. (really-make-os-string (current-os-string-text-codec)
  59. c #f)))
  60. (define (byte-vector->os-string b)
  61. (let ((c (byte-vector-copy-z b)))
  62. (make-immutable! b)
  63. (really-make-os-string (current-os-string-text-codec)
  64. #f c)))
  65. (define (os-string->byte-vector oss)
  66. (or (os-string-byte-vector oss)
  67. (let* ((string (os-string-string oss))
  68. (codec (os-string-text-codec oss))
  69. (size (string-encoding-length codec
  70. string
  71. 0
  72. (string-length string)))
  73. (bytes (make-byte-vector (+ size 1) 0))) ; NUL termination
  74. (encode-string codec
  75. string 0 (string-length string)
  76. bytes 0 size)
  77. (set-os-string-byte-vector! oss bytes)
  78. (make-immutable! bytes)
  79. bytes)))
  80. (define (os-string->string oss)
  81. (or (os-string-string oss)
  82. (let* ((bytes (os-string-byte-vector oss))
  83. (size (- (byte-vector-length bytes) 1))
  84. (codec (os-string-text-codec oss)))
  85. (call-with-values
  86. (lambda ()
  87. (bytes-string-size codec bytes 0 size #f))
  88. (lambda (status consumed-count decoded-count)
  89. (let ((string (make-string decoded-count)))
  90. (decode-string codec bytes 0 size
  91. string 0 decoded-count
  92. #\?)
  93. (set-os-string-string! oss string)
  94. (make-immutable! string)
  95. string))))))
  96. (define (x->os-string x)
  97. (cond
  98. ((os-string? x) x)
  99. ((string? x) (string->os-string x))
  100. ((byte-vector? x) (byte-vector->os-string x))))
  101. (define (os-string=? os1 os2)
  102. (byte-vector=? (os-string->byte-vector os1) (os-string->byte-vector os2)))
  103. ; frequent idioms
  104. (define (string->os-byte-vector s)
  105. (os-string->byte-vector (string->os-string s)))
  106. (define (x->os-byte-vector x)
  107. (os-string->byte-vector (x->os-string x)))
  108. ; Utilities
  109. (define (byte-vector-copy-z b)
  110. (let* ((size-old (byte-vector-length b))
  111. (nul? (and (positive? size-old)
  112. (zero? (byte-vector-ref b (- size-old 1)))))
  113. (size (if nul? size-old (+ 1 size-old)))
  114. (result (make-byte-vector size 0)))
  115. (copy-bytes! b 0 result 0 size-old)
  116. result))
  117. ; Initialization
  118. (initialize-os-string-text-codec!)