strings.scm 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. ;;; Strings
  2. ;;; Copyright (C) 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; Strings.
  18. ;;;
  19. ;;; Code:
  20. (library (hoot strings)
  21. (export string?
  22. mutable-string?
  23. string-length
  24. string-ref
  25. string-set!
  26. string
  27. make-string
  28. string-append
  29. string-copy
  30. substring
  31. string-copy!
  32. string-fill!
  33. string-for-each
  34. string-map
  35. string<?
  36. string<=?
  37. string=?
  38. string>=?
  39. string>?
  40. list->string
  41. string->list
  42. string-utf8-length
  43. string->utf8
  44. utf8->string)
  45. (import (only (hoot primitives)
  46. %string? %string-length %string-ref
  47. %utf8->string %string->utf8 %string-utf8-length
  48. guile:string->list)
  49. (hoot apply)
  50. (hoot bitwise)
  51. (hoot bytevectors)
  52. (hoot char)
  53. (hoot cond-expand)
  54. (hoot eq)
  55. (hoot errors)
  56. (hoot inline-wasm)
  57. (hoot lists)
  58. (hoot match)
  59. (hoot numbers)
  60. (hoot pairs)
  61. (hoot syntax))
  62. ;; R7RS strings
  63. (define (string? x) (%string? x))
  64. (define (mutable-string? x)
  65. (%inline-wasm '(func (param $obj (ref eq))
  66. (result (ref eq))
  67. (if (ref eq)
  68. (ref.test $mutable-string (local.get $obj))
  69. (then (ref.i31 (i32.const 17)))
  70. (else (ref.i31 (i32.const 1)))))
  71. x))
  72. (define (string-length x) (%string-length x))
  73. (define (string-ref x i) (%string-ref x i))
  74. (define (%mutable-string-set-str! x x*)
  75. (check-type x mutable-string? '%mutable-string-set-str!)
  76. (check-type x* string? '%mutable-string-set-str!)
  77. (%inline-wasm '(func (param $s (ref $mutable-string))
  78. (param $new-s (ref $string))
  79. (struct.set $mutable-string
  80. $str
  81. (local.get $s)
  82. (struct.get $string
  83. $str
  84. (local.get $new-s))))
  85. x x*)
  86. (if #f #f))
  87. (define (string-set! x i v)
  88. (check-type x mutable-string? 'string-set!)
  89. (check-range i 0 (1- (string-length x)) 'string-set!)
  90. (check-type v char? 'string-set!)
  91. (let ((x* (string-append (string-copy x 0 i)
  92. (string v)
  93. (string-copy x (1+ i) (string-length x)))))
  94. (%mutable-string-set-str! x x*)))
  95. (define (string . chars) (list->string chars))
  96. (define* (make-string n #:optional (init #\space))
  97. (check-type init char? 'make-string)
  98. (let lp ((n n) (chars '()))
  99. (if (zero? n)
  100. (list->string chars)
  101. (lp (1- n) (cons init chars)))))
  102. (define (string-append . strs)
  103. (utf8->string (bytevector-concatenate (map string->utf8 strs))))
  104. (define* (string-copy str #:optional (start 0) (end (string-length str)))
  105. (check-type str string? 'string-copy)
  106. (check-range start 0 (string-length str) 'string-copy)
  107. (check-range end start (string-length str) 'string-copy)
  108. (%inline-wasm
  109. '(func (param $str (ref string))
  110. (param $start i32)
  111. (param $end i32)
  112. (result (ref eq))
  113. (local $str_iter (ref stringview_iter))
  114. (local.set $str_iter (string.as_iter (local.get $str)))
  115. (drop
  116. (stringview_iter.advance (local.get $str_iter) (local.get $start)))
  117. (struct.new $mutable-string
  118. (i32.const 0)
  119. (stringview_iter.slice (local.get $str_iter)
  120. (i32.sub (local.get $end)
  121. (local.get $start)))))
  122. str start end))
  123. (define (substring str start end)
  124. (string-copy str start end))
  125. (define* (string-copy! to at from #:optional (start 0) (end (string-length from)))
  126. (check-type to mutable-string? 'string-copy!)
  127. (check-range at 0 (string-length to) 'string-copy!)
  128. (check-type from string? 'string-copy!)
  129. (assert (<= (- end start) (- (string-length to) at)) 'string-copy!)
  130. (let ((to* (string-append (string-copy to 0 at)
  131. (string-copy from start end)
  132. (string-copy to (+ at (- end start))))))
  133. (%mutable-string-set-str! to to*)))
  134. (define* (string-fill! string fill
  135. #:optional (start 0) (end (string-length string)))
  136. (check-type string mutable-string? 'string-fill!)
  137. (check-type fill char? 'string-fill!)
  138. (check-range start 0 (string-length string) 'string-fill!)
  139. (check-range end start (string-length string) 'string-fill!)
  140. (let ((string*
  141. (string-append (string-copy string 0 start)
  142. (make-string (- end start) fill)
  143. (string-copy string end (string-length string)))))
  144. (%mutable-string-set-str! string string*)))
  145. (define string-for-each
  146. (case-lambda
  147. ((f str) (for-each f (string->list str)))
  148. ((f str . strs)
  149. (apply for-each f (string->list str) (map string->list strs)))))
  150. ;; TODO: Support n strings, our 'map' doesn't support n lists yet.
  151. (define (string-map f str)
  152. (list->string (map f (string->list str))))
  153. (define (%string-compare a b)
  154. (if (eq? a b)
  155. 0
  156. (%inline-wasm
  157. '(func (param $a (ref string))
  158. (param $b (ref string))
  159. (result (ref eq))
  160. (ref.i31 (i32.shl (string.compare (local.get $a) (local.get $b))
  161. (i32.const 1))))
  162. a b)))
  163. (define (%string-compare* ordered? x y strs)
  164. (check-type x string? 'string-compare)
  165. (check-type y string? 'string-compare)
  166. (for-each (lambda (s) (check-type s string? 'string-compare)) strs)
  167. (define (pred a b) (ordered? (%string-compare a b) 0))
  168. (and (pred x y)
  169. (let lp ((y y) (strs strs))
  170. (match strs
  171. (() #t)
  172. ((z . strs) (and (pred y z) (lp z strs)))))))
  173. (define (string<? x y . strs) (%string-compare* < x y strs))
  174. (define (string<=? x y . strs) (%string-compare* <= x y strs))
  175. (define (string=? x y . strs) (%string-compare* = x y strs))
  176. (define (string>=? x y . strs) (%string-compare* >= x y strs))
  177. (define (string>? x y . strs) (%string-compare* > x y strs))
  178. (define (list->string chars)
  179. (define utf8-length
  180. (let lp ((len 0) (chars chars))
  181. (match chars
  182. (() len)
  183. ((ch . chars)
  184. (lp (+ len (let ((i (char->integer ch)))
  185. (cond
  186. ((<= i #x7f) 1)
  187. ((<= i #x7ff) 2)
  188. ((<= i #xffff) 3)
  189. (else 4))))
  190. chars)))))
  191. (define bv (make-bytevector utf8-length 0))
  192. (let lp ((pos 0) (chars chars))
  193. (match chars
  194. (() (%utf8->string bv))
  195. ((ch . chars)
  196. (lp
  197. (+ pos
  198. (let ((i (char->integer ch)))
  199. (define (low-six i) (logand i #b111111))
  200. (define (put! offset byte)
  201. (bytevector-u8-set! bv (+ pos offset) byte))
  202. (cond
  203. ((<= i #x7f)
  204. (put! 0 i)
  205. 1)
  206. ((<= i #x7ff)
  207. (put! 0 (logior #b11000000 (ash i -6)))
  208. (put! 1 (logior #b10000000 (low-six i)))
  209. 2)
  210. ((<= i #xffff)
  211. (put! 0 (logior #b11100000 (ash i -12)))
  212. (put! 1 (logior #b10000000 (low-six (ash i -6))))
  213. (put! 2 (logior #b10000000 (low-six i)))
  214. 3)
  215. (else
  216. (put! 0 (logior #b11110000 (ash i -18)))
  217. (put! 1 (logior #b10000000 (low-six (ash i -12))))
  218. (put! 2 (logior #b10000000 (low-six (ash i -6))))
  219. (put! 3 (logior #b10000000 (low-six i)))
  220. 4))))
  221. chars)))))
  222. (cond-expand
  223. (guile-vm
  224. (define string->list guile:string->list))
  225. (hoot
  226. (define* (string->list str #:optional (start 0) (end (string-length str)))
  227. (check-type str string? 'string->list)
  228. (check-range start 0 (string-length str) 'string->list)
  229. (check-range end start (string-length str) 'string->list)
  230. (%inline-wasm
  231. '(func (param $s (ref string)) (param $start i32) (param $end i32)
  232. (result (ref eq))
  233. (local $str_iter (ref stringview_iter))
  234. (local $s0 (ref eq))
  235. (local $i0 i32)
  236. (local.set $str_iter (string.as_iter (local.get $s)))
  237. (local.set $s0
  238. (struct.new $mutable-pair
  239. (i32.const 0)
  240. (ref.i31 (i32.const 1))
  241. (ref.i31 (i32.const 13))))
  242. (local.set $i0
  243. (i32.sub (local.get $end) (local.get $start)))
  244. (drop
  245. (stringview_iter.advance (local.get $str_iter) (local.get $start)))
  246. (ref.cast $mutable-pair (local.get $s0))
  247. (loop $lp
  248. (if (local.get $i0)
  249. (then
  250. (ref.cast $mutable-pair (local.get $s0))
  251. (local.tee
  252. $s0
  253. (struct.new $mutable-pair
  254. (i32.const 0)
  255. (ref.i31
  256. (i32.add
  257. (i32.shl (stringview_iter.next (local.get $str_iter))
  258. (i32.const 2))
  259. (i32.const #b11)))
  260. (ref.i31 (i32.const 13))))
  261. (struct.set $mutable-pair $cdr)
  262. (local.set $i0 (i32.sub (local.get $i0) (i32.const 1)))
  263. (br $lp))))
  264. (struct.get $mutable-pair $cdr))
  265. str start end))))
  266. (define (string-utf8-length str) (%string-utf8-length str))
  267. (define string->utf8
  268. (case-lambda
  269. ((str) (%string->utf8 str))
  270. ((str start) (%string->utf8
  271. (if (zero? start)
  272. str
  273. (string-copy str start))))
  274. ((str start end) (%string->utf8
  275. (if (and (zero? start) (eq? end (string-length str)))
  276. str
  277. (string-copy str start end))))))
  278. (define utf8->string
  279. (case-lambda
  280. ((bv) (%utf8->string bv))
  281. ((bv start) (%utf8->string
  282. (if (zero? start)
  283. bv
  284. (bytevector-copy bv start))))
  285. ((bv start end) (%utf8->string
  286. (if (and (zero? start) (eq? end (bytevector-length bv)))
  287. bv
  288. (bytevector-copy bv start end)))))))