binary-string.scm 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. ;;; Disarchive
  2. ;;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2020, 2021 Timothy Sample <samplet@ngyro.com>
  4. ;;;
  5. ;;; This file is part of Disarchive.
  6. ;;;
  7. ;;; Disarchive is free software: you can redistribute it and/or modify
  8. ;;; it under the terms of the GNU General Public License as published by
  9. ;;; the Free Software Foundation, either version 3 of the License, or
  10. ;;; (at your option) any later version.
  11. ;;;
  12. ;;; Disarchive is distributed in the hope that it will be useful,
  13. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;;; GNU General Public License for more details.
  16. ;;;
  17. ;;; You should have received a copy of the GNU General Public License
  18. ;;; along with Disarchive. If not, see <http://www.gnu.org/licenses/>.
  19. (define-module (disarchive kinds binary-string)
  20. #:use-module (disarchive serialization)
  21. #:use-module (disarchive utils)
  22. #:use-module (gcrypt base64)
  23. #:use-module (ice-9 match)
  24. #:use-module (rnrs bytevectors)
  25. #:use-module (srfi srfi-1)
  26. #:export (binary-string?
  27. valid-binary-string?
  28. no-null-binary-string?
  29. decode-binary-string
  30. encode-binary-string
  31. -binary-string-
  32. binary-string-length
  33. binary-string-append))
  34. ;;; Commentary:
  35. ;;;
  36. ;;; A binary string is a representation of a sequence of bytes that is
  37. ;;; opportunistically decoded as UTF-8. What this means is that any
  38. ;;; sequence of bytes that is valid UTF-8 will treated as UTF-8 (even
  39. ;;; if it isn't). However, a sequence of bytes that is not valid
  40. ;;; UTF-8 will be preserved as a bytevector.
  41. ;;;
  42. ;;; Code:
  43. (define (binary-string? obj)
  44. "Check if OBJ is a \"binary string\" (either a string or a
  45. bytevector)."
  46. (or (string? obj) (bytevector? obj)))
  47. (define (valid-binary-string? str)
  48. "Check that STR satisfies the constraints of a binary string."
  49. ;; In the case that STR is a bytevector, we must check that it is
  50. ;; not valid UTF-8. Otherwise, it should be a string.
  51. (define (utf8? bv) (false-if-exception (utf8->string bv)))
  52. (match str
  53. ((or (? string?)
  54. (and (? bytevector?)
  55. (? (negate utf8?))))
  56. #t)
  57. (_ #f)))
  58. (define (no-null-binary-string? str)
  59. "Check that STR does not contain any nulls ('#\nul' for strings and
  60. '0' for bytevectors)."
  61. (match str
  62. ((? string?) (not (string-any #\nul str)))
  63. ((? bytevector?) (let loop ((k 0))
  64. (if (>= k (bytevector-length str))
  65. #t
  66. (if (zero? (bytevector-u8-ref str k))
  67. #f
  68. (loop (1+ k))))))
  69. (_ (scm-error 'wrong-type-arg 'no-null-binary-string
  70. (string-append "Wrong type argument in position 1 "
  71. "(expecting binary-string): ~A")
  72. (list str) (list str)))))
  73. (define decode-binary-string
  74. (case-lambda
  75. "Decode the contents of the bytevector BV as a binary string.
  76. Optionally, START and END indexes can be provided to decode only a
  77. part of BV."
  78. ((bv) (or (false-if-exception (utf8->string bv)) bv))
  79. ((bv start) (decode-binary-string bv start (bytevector-length bv)))
  80. ((bv start end) (decode-binary-string (sub-bytevector bv start end)))))
  81. (define* encode-binary-string
  82. (case-lambda
  83. "Encode the binary string STR. If BV is set, the result will be
  84. written into BV. Otherwise, the result will be written into a new
  85. bytevector. If you are providing a bytevector, you can also provide
  86. START and END indexes to control where the result is written."
  87. ((str)
  88. (match str
  89. ((? string?) (string->utf8 str))
  90. ((? bytevector?) str)
  91. (_ (scm-error 'wrong-type-arg 'encode-binary-string
  92. (string-append "Wrong type argument in position 1 "
  93. "(expecting binary-string): ~A")
  94. (list str) (list str)))))
  95. ((str bv)
  96. (encode-binary-string str bv 0 (bytevector-length bv)))
  97. ((str bv start)
  98. (encode-binary-string str bv start (bytevector-length bv)))
  99. ((str bv start end)
  100. (let* ((bstr (encode-binary-string str))
  101. (bstr-len (bytevector-length bstr))
  102. (space (- end start))
  103. (leftover-space (- space bstr-len)))
  104. (bytevector-copy! bstr 0 bv start (min bstr-len (- end start)))
  105. (when (positive? leftover-space)
  106. (bytevector-fill!* bv 0 end leftover-space))))))
  107. (define (binary-string->sexp str)
  108. (match str
  109. ((? string?) str)
  110. ((? bytevector?) `(%base64 ,(base64-encode str)))
  111. (_ (scm-error 'wrong-type-arg 'binary-string->sexp
  112. (string-append "Wrong type argument in position 1 "
  113. "(expecting binary-string): ~A")
  114. (list str) (list str)))))
  115. (define (sexp->binary-string obj)
  116. (match obj
  117. ((? string?) obj)
  118. (('%base64 (? string? str)) (base64-decode str))
  119. (_ (scm-error 'misc-error 'sexp->binary-string
  120. (string-append "Invalid binary string S-exp: ~A")
  121. (list obj) (list obj)))))
  122. (define -binary-string-
  123. (make-serializer
  124. (lambda (str _) (list (binary-string->sexp str)))
  125. (lambda (obj _) (sexp->binary-string (car obj)))))
  126. (define (binary-string-length str)
  127. "Return the length (in bytes) of the binary representation of STR."
  128. (match str
  129. ((? string?) (string-utf8-length str))
  130. ((? bytevector?) (bytevector-length str))
  131. (_ (scm-error 'wrong-type-arg 'binary-string-length
  132. (string-append "Wrong type argument in position 1 "
  133. "(expecting binary-string): ~A")
  134. (list str) (list str)))))
  135. (define (binary-string-append . strs)
  136. (if (every string? strs)
  137. (string-concatenate strs)
  138. (let* ((len (reduce + 0 (map binary-string-length strs)))
  139. (result (make-bytevector len)))
  140. (let loop ((strs strs) (k 0))
  141. (match strs
  142. (() result)
  143. (((? string? str) . rest)
  144. (loop (cons (string->utf8 str) rest) k))
  145. (((? bytevector? bv) . rest)
  146. (bytevector-copy! bv 0 result k (bytevector-length bv))
  147. (loop rest (+ k (bytevector-length bv)))))))))