strings.scm 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. ;;; Strings
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; Strings.
  18. ;;;
  19. ;;; Code:
  20. (library (hoot strings)
  21. (export string?
  22. mutable-string?
  23. string-length
  24. string-ref
  25. string-set!
  26. string
  27. make-string
  28. string-append
  29. string-copy
  30. substring
  31. string-copy!
  32. string-fill!
  33. string-for-each
  34. string-map
  35. string<?
  36. string<=?
  37. string=?
  38. string>=?
  39. string>?
  40. list->string
  41. string->list
  42. string-utf8-length
  43. string->utf8
  44. utf8->string)
  45. (import (hoot primitives)
  46. (hoot pairs)
  47. (hoot eq)
  48. (hoot char)
  49. (hoot lists)
  50. (hoot errors)
  51. (hoot numbers)
  52. (hoot bitwise)
  53. (hoot bytevectors)
  54. (hoot match))
  55. ;; R7RS strings
  56. (define (string? x) (%string? x))
  57. (define (mutable-string? x)
  58. (%inline-wasm '(func (param $obj (ref eq))
  59. (result (ref eq))
  60. (if (ref eq)
  61. (ref.test $mutable-string (local.get $obj))
  62. (then (ref.i31 (i32.const 17)))
  63. (else (ref.i31 (i32.const 1)))))
  64. x))
  65. (define (string-length x) (%string-length x))
  66. (define (string-ref x i) (%string-ref x i))
  67. (define (%mutable-string-set-str! x x*)
  68. (check-type x mutable-string? '%mutable-string-set-str!)
  69. (check-type x* string? '%mutable-string-set-str!)
  70. (%inline-wasm '(func (param $s (ref $mutable-string))
  71. (param $new-s (ref $string))
  72. (struct.set $mutable-string
  73. $str
  74. (local.get $s)
  75. (struct.get $string
  76. $str
  77. (local.get $new-s))))
  78. x x*)
  79. (if #f #f))
  80. (define (string-set! x i v)
  81. (check-type x mutable-string? 'string-set!)
  82. (check-range i 0 (1- (string-length x)) 'string-set!)
  83. (check-type v char? 'string-set!)
  84. (let ((x* (string-append (string-copy x 0 i)
  85. (string v)
  86. (string-copy x (1+ i) (string-length x)))))
  87. (%mutable-string-set-str! x x*)))
  88. (define (string . chars) (list->string chars))
  89. (define* (make-string n #:optional (init #\space))
  90. (check-type init char? 'make-string)
  91. (let lp ((n n) (chars '()))
  92. (if (zero? n)
  93. (list->string chars)
  94. (lp (1- n) (cons init chars)))))
  95. (define (string-append . strs)
  96. (utf8->string (bytevector-concatenate (map string->utf8 strs))))
  97. (define* (string-copy str #:optional (start 0) (end (string-length str)))
  98. (check-type str string? 'string-copy)
  99. (check-range start 0 (string-length str) 'string-copy)
  100. (check-range end start (string-length str) 'string-copy)
  101. (%inline-wasm
  102. '(func (param $str (ref string))
  103. (param $start i32)
  104. (param $end i32)
  105. (result (ref eq))
  106. (local $str_iter (ref stringview_iter))
  107. (local.set $str_iter (string.as_iter (local.get $str)))
  108. (drop
  109. (stringview_iter.advance (local.get $str_iter) (local.get $start)))
  110. (struct.new $mutable-string
  111. (i32.const 0)
  112. (stringview_iter.slice (local.get $str_iter)
  113. (i32.sub (local.get $end)
  114. (local.get $start)))))
  115. str start end))
  116. (define (substring str start end)
  117. (string-copy str start end))
  118. (define* (string-copy! to at from #:optional (start 0) (end (string-length from)))
  119. (check-type to mutable-string? 'string-copy!)
  120. (check-range at 0 (string-length to) 'string-copy!)
  121. (check-type from string? 'string-copy!)
  122. (assert (<= (- end start) (- (string-length to) at)) 'string-copy!)
  123. (let ((to* (string-append (string-copy to 0 at)
  124. (string-copy from start end)
  125. (string-copy to (+ at (- end start))))))
  126. (%mutable-string-set-str! to to*)))
  127. (define* (string-fill! string fill
  128. #:optional (start 0) (end (string-length string)))
  129. (check-type string mutable-string? 'string-fill!)
  130. (check-type fill char? 'string-fill!)
  131. (check-range start 0 (string-length string) 'string-fill!)
  132. (check-range end start (string-length string) 'string-fill!)
  133. (let ((string*
  134. (string-append (string-copy string 0 start)
  135. (make-string (- end start) fill)
  136. (string-copy string end (string-length string)))))
  137. (%mutable-string-set-str! string string*)))
  138. (define string-for-each
  139. (case-lambda
  140. ((f str) (for-each f (string->list str)))
  141. ((f str . strs)
  142. (apply for-each f (string->list str) (map string->list strs)))))
  143. ;; TODO: Support n strings, our 'map' doesn't support n lists yet.
  144. (define (string-map f str)
  145. (list->string (map f (string->list str))))
  146. (define (%string-compare a b)
  147. (if (eq? a b)
  148. 0
  149. (%inline-wasm
  150. '(func (param $a (ref string))
  151. (param $b (ref string))
  152. (result (ref eq))
  153. (ref.i31 (i32.shl (string.compare (local.get $a) (local.get $b))
  154. (i32.const 1))))
  155. a b)))
  156. (define (%string-compare* ordered? x y strs)
  157. (check-type x string? 'string-compare)
  158. (check-type y string? 'string-compare)
  159. (for-each (lambda (s) (check-type s string? 'string-compare)) strs)
  160. (define (pred a b) (ordered? (%string-compare a b) 0))
  161. (and (pred x y)
  162. (let lp ((y y) (strs strs))
  163. (match strs
  164. (() #t)
  165. ((z . strs) (and (pred y z) (lp z strs)))))))
  166. (define (string<? x y . strs) (%string-compare* < x y strs))
  167. (define (string<=? x y . strs) (%string-compare* <= x y strs))
  168. (define (string=? x y . strs) (%string-compare* = x y strs))
  169. (define (string>=? x y . strs) (%string-compare* >= x y strs))
  170. (define (string>? x y . strs) (%string-compare* > x y strs))
  171. (define (list->string chars)
  172. (define utf8-length
  173. (let lp ((len 0) (chars chars))
  174. (match chars
  175. (() len)
  176. ((ch . chars)
  177. (lp (+ len (let ((i (char->integer ch)))
  178. (cond
  179. ((<= i #x7f) 1)
  180. ((<= i #x7ff) 2)
  181. ((<= i #xffff) 3)
  182. (else 4))))
  183. chars)))))
  184. (define bv (make-bytevector utf8-length 0))
  185. (let lp ((pos 0) (chars chars))
  186. (match chars
  187. (() (%utf8->string bv))
  188. ((ch . chars)
  189. (lp
  190. (+ pos
  191. (let ((i (char->integer ch)))
  192. (define (low-six i) (logand i #b111111))
  193. (define (put! offset byte)
  194. (bytevector-u8-set! bv (+ pos offset) byte))
  195. (cond
  196. ((<= i #x7f)
  197. (put! 0 i)
  198. 1)
  199. ((<= i #x7ff)
  200. (put! 0 (logior #b11000000 (ash i -6)))
  201. (put! 1 (logior #b10000000 (low-six i)))
  202. 2)
  203. ((<= i #xffff)
  204. (put! 0 (logior #b11100000 (ash i -12)))
  205. (put! 1 (logior #b10000000 (low-six (ash i -6))))
  206. (put! 2 (logior #b10000000 (low-six i)))
  207. 3)
  208. (else
  209. (put! 0 (logior #b11110000 (ash i -18)))
  210. (put! 1 (logior #b10000000 (low-six (ash i -12))))
  211. (put! 2 (logior #b10000000 (low-six (ash i -6))))
  212. (put! 3 (logior #b10000000 (low-six i)))
  213. 4))))
  214. chars)))))
  215. (define* (string->list str #:optional (start 0) (end (string-length str)))
  216. (check-type str string? 'string->list)
  217. (check-range start 0 (string-length str) 'string->list)
  218. (check-range end start (string-length str) 'string->list)
  219. (%inline-wasm
  220. '(func (param $s (ref string)) (param $start i32) (param $end i32)
  221. (result (ref eq))
  222. (local $str_iter (ref stringview_iter))
  223. (local $s0 (ref eq))
  224. (local $i0 i32)
  225. (local.set $str_iter (string.as_iter (local.get $s)))
  226. (local.set $s0
  227. (struct.new $mutable-pair
  228. (i32.const 0)
  229. (ref.i31 (i32.const 1))
  230. (ref.i31 (i32.const 13))))
  231. (local.set $i0
  232. (i32.sub (local.get $end) (local.get $start)))
  233. (drop
  234. (stringview_iter.advance (local.get $str_iter) (local.get $start)))
  235. (ref.cast $mutable-pair (local.get $s0))
  236. (loop $lp
  237. (if (local.get $i0)
  238. (then
  239. (ref.cast $mutable-pair (local.get $s0))
  240. (local.tee
  241. $s0
  242. (struct.new $mutable-pair
  243. (i32.const 0)
  244. (ref.i31
  245. (i32.add
  246. (i32.shl (stringview_iter.next (local.get $str_iter))
  247. (i32.const 2))
  248. (i32.const #b11)))
  249. (ref.i31 (i32.const 13))))
  250. (struct.set $mutable-pair $cdr)
  251. (local.set $i0 (i32.sub (local.get $i0) (i32.const 1)))
  252. (br $lp))))
  253. (struct.get $mutable-pair $cdr))
  254. str start end))
  255. (define (string-utf8-length str) (%string-utf8-length str))
  256. (define string->utf8
  257. (case-lambda
  258. ((str) (%string->utf8 str))
  259. ((str start) (%string->utf8
  260. (if (zero? start)
  261. str
  262. (string-copy str start))))
  263. ((str start end) (%string->utf8
  264. (if (and (zero? start) (eq? end (string-length str)))
  265. str
  266. (string-copy str start end))))))
  267. (define utf8->string
  268. (case-lambda
  269. ((bv) (%utf8->string bv))
  270. ((bv start) (%utf8->string
  271. (if (zero? start)
  272. bv
  273. (bytevector-copy bv start))))
  274. ((bv start end) (%utf8->string
  275. (if (and (zero? start) (eq? end (bytevector-length bv)))
  276. bv
  277. (bytevector-copy bv start end)))))))