strings.test 17 KB


  1. ;;;; strings.test --- test suite for Guile's string functions -*- scheme -*-
  2. ;;;; Jim Blandy <jimb@red-bean.com> --- August 1999
  3. ;;;;
  4. ;;;; Copyright (C) 1999, 2001, 2004, 2005, 2006, 2008, 2009, 2010,
  5. ;;;; 2011 Free Software Foundation, Inc.
  6. ;;;;
  7. ;;;; This library is free software; you can redistribute it and/or
  8. ;;;; modify it under the terms of the GNU Lesser General Public
  9. ;;;; License as published by the Free Software Foundation; either
  10. ;;;; version 3 of the License, or (at your option) any later version.
  11. ;;;;
  12. ;;;; This library is distributed in the hope that it will be useful,
  13. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;;;; Lesser General Public License for more details.
  16. ;;;;
  17. ;;;; You should have received a copy of the GNU Lesser General Public
  18. ;;;; License along with this library; if not, write to the Free Software
  19. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. (define-module (test-strings)
  21. #:use-module ((system base compile) #:select (compile))
  22. #:use-module (test-suite lib))
  23. (define exception:read-only-string
  24. (cons 'misc-error "^string is read-only"))
  25. (define exception:illegal-escape
  26. (cons 'read-error "illegal character in escape sequence"))
  27. ;; Wrong types may have either the 'wrong-type-arg key when
  28. ;; interpreted or 'vm-error when compiled. This matches both.
  29. (define exception:wrong-type-arg
  30. (cons #t "Wrong type"))
  31. ;; Create a string from integer char values, eg. (string-ints 65) => "A"
  32. (define (string-ints . args)
  33. (apply string (map integer->char args)))
  34. ;;
  35. ;; string internals
  36. ;;
  37. ;; Some abbreviations
  38. ;; BMP - Basic Multilingual Plane (codepoints below U+FFFF)
  39. ;; SMP - Suplementary Multilingual Plane (codebpoints from U+10000 to U+1FFFF)
  40. (with-test-prefix "string internals"
  41. (pass-if "new string starts at 1st char in stringbuf"
  42. (let ((s "abc"))
  43. (= 0 (assq-ref (%string-dump s) 'start))))
  44. (pass-if "length of new string same as stringbuf"
  45. (let ((s "def"))
  46. (= (string-length s) (assq-ref (%string-dump s) 'stringbuf-length))))
  47. (pass-if "contents of new string same as stringbuf"
  48. (let ((s "ghi"))
  49. (string=? s (assq-ref (%string-dump s) 'stringbuf-chars))))
  50. (pass-if "writable strings are not read-only"
  51. (let ((s "zyx"))
  52. (not (assq-ref (%string-dump s) 'read-only))))
  53. (pass-if "read-only strings are read-only"
  54. (let ((s (substring/read-only "zyx" 0)))
  55. (assq-ref (%string-dump s) 'read-only)))
  56. (pass-if "new Latin-1 encoded strings are not shared"
  57. (let ((s "abc"))
  58. (not (assq-ref (%string-dump s) 'stringbuf-shared))))
  59. (pass-if "new UCS-4 encoded strings are not shared"
  60. (let ((s "\u0100bc"))
  61. (not (assq-ref (%string-dump s) 'stringbuf-shared))))
  62. ;; Should this be true? It isn't currently true.
  63. (pass-if "null shared substrings are shared"
  64. (let* ((s1 "")
  65. (s2 (substring/shared s1 0 0)))
  66. (throw 'untested)
  67. (eq? (assq-ref (%string-dump s2) 'shared)
  68. s1)))
  69. (pass-if "ASCII shared substrings are shared"
  70. (let* ((s1 "foobar")
  71. (s2 (substring/shared s1 0 3)))
  72. (eq? (assq-ref (%string-dump s2) 'shared)
  73. s1)))
  74. (pass-if "BMP shared substrings are shared"
  75. (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
  76. (s2 (substring/shared s1 0 3)))
  77. (eq? (assq-ref (%string-dump s2) 'shared)
  78. s1)))
  79. (pass-if "null substrings are not shared"
  80. (let* ((s1 "")
  81. (s2 (substring s1 0 0)))
  82. (not (eq? (assq-ref (%string-dump s2) 'shared)
  83. s1))))
  84. (pass-if "ASCII substrings are not shared"
  85. (let* ((s1 "foobar")
  86. (s2 (substring s1 0 3)))
  87. (not (eq? (assq-ref (%string-dump s2) 'shared)
  88. s1))))
  89. (pass-if "BMP substrings are not shared"
  90. (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
  91. (s2 (substring s1 0 3)))
  92. (not (eq? (assq-ref (%string-dump s2) 'shared)
  93. s1))))
  94. (pass-if "ASCII substrings share stringbufs before copy-on-write"
  95. (let* ((s1 "foobar")
  96. (s2 (substring s1 0 3)))
  97. (assq-ref (%string-dump s1) 'stringbuf-shared)))
  98. (pass-if "BMP substrings share stringbufs before copy-on-write"
  99. (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
  100. (s2 (substring s1 0 3)))
  101. (assq-ref (%string-dump s1) 'stringbuf-shared)))
  102. (pass-if "ASCII substrings don't share stringbufs after copy-on-write"
  103. (let* ((s1 "foobar")
  104. (s2 (substring s1 0 3)))
  105. (string-set! s2 0 #\F)
  106. (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
  107. (pass-if "BMP substrings don't share stringbufs after copy-on-write"
  108. (let* ((s1 "\u0100\u0101\u0102\u0103\u0104\u0105")
  109. (s2 (substring s1 0 3)))
  110. (string-set! s2 0 #\F)
  111. (not (assq-ref (%string-dump s2) 'stringbuf-shared))))
  112. (with-test-prefix "encodings"
  113. (pass-if "null strings are Latin-1 encoded"
  114. (let ((s ""))
  115. (not (assq-ref (%string-dump s) 'stringbuf-wide))))
  116. (pass-if "ASCII strings are Latin-1 encoded"
  117. (let ((s "jkl"))
  118. (not (assq-ref (%string-dump s) 'stringbuf-wide))))
  119. (pass-if "Latin-1 strings are Latin-1 encoded"
  120. (let ((s "\xC0\xC1\xC2"))
  121. (not (assq-ref (%string-dump s) 'stringbuf-wide))))
  122. (pass-if "BMP strings are UCS-4 encoded"
  123. (let ((s "\u0100\u0101\x0102"))
  124. (assq-ref (%string-dump s) 'stringbuf-wide)))
  125. (pass-if "SMP strings are UCS-4 encoded"
  126. (let ((s "\U010300\u010301\x010302"))
  127. (assq-ref (%string-dump s) 'stringbuf-wide)))
  128. (pass-if "null list->string is Latin-1 encoded"
  129. (let ((s (string-ints)))
  130. (not (assq-ref (%string-dump s) 'stringbuf-wide))))
  131. (pass-if "ASCII list->string is Latin-1 encoded"
  132. (let ((s (string-ints 65 66 67)))
  133. (not (assq-ref (%string-dump s) 'stringbuf-wide))))
  134. (pass-if "Latin-1 list->string is Latin-1 encoded"
  135. (let ((s (string-ints #xc0 #xc1 #xc2)))
  136. (not (assq-ref (%string-dump s) 'stringbuf-wide))))
  137. (pass-if "BMP list->string is UCS-4 encoded"
  138. (let ((s (string-ints #x0100 #x0101 #x0102)))
  139. (assq-ref (%string-dump s) 'stringbuf-wide)))
  140. (pass-if "SMP list->string is UCS-4 encoded"
  141. (let ((s (string-ints #x010300 #x010301 #x010302)))
  142. (assq-ref (%string-dump s) 'stringbuf-wide)))
  143. (pass-if "encoding of string not based on escape style"
  144. (let ((s "\U000040"))
  145. (not (assq-ref (%string-dump s) 'stringbuf-wide))))))
  146. (with-test-prefix "escapes"
  147. (pass-if-exception "non-hex char in two-digit hex-escape"
  148. exception:illegal-escape
  149. (with-input-from-string "\"\\x0g\"" read))
  150. (pass-if-exception "non-hex char in four-digit hex-escape"
  151. exception:illegal-escape
  152. (with-input-from-string "\"\\u000g\"" read))
  153. (pass-if-exception "non-hex char in six-digit hex-escape"
  154. exception:illegal-escape
  155. (with-input-from-string "\"\\U00000g\"" read))
  156. (pass-if-exception "premature termination of two-digit hex-escape"
  157. exception:illegal-escape
  158. (with-input-from-string "\"\\x0\"" read))
  159. (pass-if-exception "premature termination of four-digit hex-escape"
  160. exception:illegal-escape
  161. (with-input-from-string "\"\\u000\"" read))
  162. (pass-if-exception "premature termination of six-digit hex-escape"
  163. exception:illegal-escape
  164. (with-input-from-string "\"\\U00000\"" read))
  165. (pass-if "extra hex digits ignored for two-digit hex escape"
  166. (eqv? (string-ref "--\xfff--" 2)
  167. (integer->char #xff)))
  168. (pass-if "extra hex digits ignored for four-digit hex escape"
  169. (eqv? (string-ref "--\u0100f--" 2)
  170. (integer->char #x0100)))
  171. (pass-if "extra hex digits ignored for six-digit hex escape"
  172. (eqv? (string-ref "--\U010300f--" 2)
  173. (integer->char #x010300)))
  174. (pass-if "escaped characters match non-escaped ASCII characters"
  175. (string=? "ABC" "\x41\u0042\U000043"))
  176. (pass-if "R5RS backslash escapes"
  177. (string=? "\"\\" (string #\" #\\)))
  178. (pass-if "R6RS backslash escapes"
  179. (string=? "\a\b\t\n\v\f\r"
  180. (string #\alarm #\backspace #\tab #\newline #\vtab
  181. #\page #\return)))
  182. (pass-if "Guile extensions backslash escapes"
  183. (string=? "\0" (string #\nul))))
  184. ;;
  185. ;; string?
  186. ;;
  187. (with-test-prefix "string?"
  188. (pass-if "string"
  189. (string? "abc"))
  190. (pass-if "symbol"
  191. (not (string? 'abc))))
  192. ;;
  193. ;; literals
  194. ;;
  195. (with-test-prefix "literals"
  196. ;; The "Storage Model" section of R5RS reads: "In such systems literal
  197. ;; constants and the strings returned by `symbol->string' are
  198. ;; immutable objects". `eval' doesn't support it yet, but it doesn't
  199. ;; really matter because `eval' doesn't coalesce repeated constants,
  200. ;; unlike the bytecode compiler.
  201. (pass-if-exception "literals are constant"
  202. exception:read-only-string
  203. (compile '(string-set! "literal string" 0 #\x)
  204. #:from 'scheme
  205. #:to 'value)))
  206. ;;
  207. ;; string-null?
  208. ;;
  209. (with-test-prefix "string-null?"
  210. (pass-if "null string"
  211. (string-null? ""))
  212. (pass-if "non-null string"
  213. (not (string-null? "a")))
  214. (pass-if "respects \\0"
  215. (not (string-null? "\0")))
  216. (pass-if-exception "symbol"
  217. exception:wrong-type-arg
  218. (string-null? 'a)))
  219. ;;
  220. ;; string=?
  221. ;;
  222. (with-test-prefix "string=?"
  223. (pass-if "respects 1st parameter's string length"
  224. (not (string=? "foo\0" "foo")))
  225. (pass-if "respects 2nd paramter's string length"
  226. (not (string=? "foo" "foo\0")))
  227. (with-test-prefix "wrong argument type"
  228. (pass-if-exception "1st argument symbol"
  229. exception:wrong-type-arg
  230. (string=? 'a "a"))
  231. (pass-if-exception "2nd argument symbol"
  232. exception:wrong-type-arg
  233. (string=? "a" 'b))
  234. (pass-if-exception "1st argument EOF"
  235. exception:wrong-type-arg
  236. (string=? (with-input-from-string "" read) "b"))
  237. (pass-if-exception "2nd argument EOF"
  238. exception:wrong-type-arg
  239. (string=? "a" (with-input-from-string "" read)))))
  240. ;;
  241. ;; string<?
  242. ;;
  243. (with-test-prefix "string<?"
  244. (pass-if "respects string length"
  245. (and (not (string<? "foo\0a" "foo\0a"))
  246. (string<? "foo\0a" "foo\0b")))
  247. (with-test-prefix "wrong argument type"
  248. (pass-if-exception "1st argument symbol"
  249. exception:wrong-type-arg
  250. (string<? 'a "a"))
  251. (pass-if-exception "2nd argument symbol"
  252. exception:wrong-type-arg
  253. (string<? "a" 'b)))
  254. (pass-if "same as char<?"
  255. (eq? (char<? (integer->char 0) (integer->char 255))
  256. (string<? (string-ints 0) (string-ints 255)))))
  257. ;;
  258. ;; string-ci<?
  259. ;;
  260. (with-test-prefix "string-ci<?"
  261. (pass-if "respects string length"
  262. (and (not (string-ci<? "foo\0a" "foo\0a"))
  263. (string-ci<? "foo\0a" "foo\0b")))
  264. (with-test-prefix "wrong argument type"
  265. (pass-if-exception "1st argument symbol"
  266. exception:wrong-type-arg
  267. (string-ci<? 'a "a"))
  268. (pass-if-exception "2nd argument symbol"
  269. exception:wrong-type-arg
  270. (string-ci<? "a" 'b)))
  271. (pass-if "same as char-ci<?"
  272. (eq? (char-ci<? (integer->char 0) (integer->char 255))
  273. (string-ci<? (string-ints 0) (string-ints 255)))))
  274. ;;
  275. ;; string<=?
  276. ;;
  277. (with-test-prefix "string<=?"
  278. (pass-if "same as char<=?"
  279. (eq? (char<=? (integer->char 0) (integer->char 255))
  280. (string<=? (string-ints 0) (string-ints 255)))))
  281. ;;
  282. ;; string-ci<=?
  283. ;;
  284. (with-test-prefix "string-ci<=?"
  285. (pass-if "same as char-ci<=?"
  286. (eq? (char-ci<=? (integer->char 0) (integer->char 255))
  287. (string-ci<=? (string-ints 0) (string-ints 255)))))
  288. ;;
  289. ;; string>?
  290. ;;
  291. (with-test-prefix "string>?"
  292. (pass-if "same as char>?"
  293. (eq? (char>? (integer->char 0) (integer->char 255))
  294. (string>? (string-ints 0) (string-ints 255)))))
  295. ;;
  296. ;; string-ci>?
  297. ;;
  298. (with-test-prefix "string-ci>?"
  299. (pass-if "same as char-ci>?"
  300. (eq? (char-ci>? (integer->char 0) (integer->char 255))
  301. (string-ci>? (string-ints 0) (string-ints 255)))))
  302. ;;
  303. ;; string>=?
  304. ;;
  305. (with-test-prefix "string>=?"
  306. (pass-if "same as char>=?"
  307. (eq? (char>=? (integer->char 0) (integer->char 255))
  308. (string>=? (string-ints 0) (string-ints 255)))))
  309. ;;
  310. ;; string-ci>=?
  311. ;;
  312. (with-test-prefix "string-ci>=?"
  313. (pass-if "same as char-ci>=?"
  314. (eq? (char-ci>=? (integer->char 0) (integer->char 255))
  315. (string-ci>=? (string-ints 0) (string-ints 255)))))
  316. ;;
  317. ;; Unicode string normalization forms
  318. ;;
  319. ;;
  320. ;; string-normalize-nfd
  321. ;;
  322. (with-test-prefix "string-normalize-nfd"
  323. (pass-if "canonical decomposition is equal?"
  324. (equal? (string-normalize-nfd "\xe9") "\x65\u0301")))
  325. ;;
  326. ;; string-normalize-nfkd
  327. ;;
  328. (with-test-prefix "string-normalize-nfkd"
  329. (pass-if "compatibility decomposition is equal?"
  330. (equal? (string-normalize-nfkd "\u1e9b\u0323") "s\u0323\u0307")))
  331. ;;
  332. ;; string-normalize-nfc
  333. ;;
  334. (with-test-prefix "string-normalize-nfc"
  335. (pass-if "canonical composition is equal?"
  336. (equal? (string-normalize-nfc "\x65\u0301") "\xe9")))
  337. ;;
  338. ;; string-normalize-nfkc
  339. ;;
  340. (with-test-prefix "string-normalize-nfkc"
  341. (pass-if "compatibility composition is equal?"
  342. (equal? (string-normalize-nfkc "\u1e9b\u0323") "\u1e69")))
  343. ;;
  344. ;; string-ref
  345. ;;
  346. (with-test-prefix "string-ref"
  347. (pass-if-exception "empty string"
  348. exception:out-of-range
  349. (string-ref "" 0))
  350. (pass-if-exception "empty string and non-zero index"
  351. exception:out-of-range
  352. (string-ref "" 123))
  353. (pass-if-exception "out of range"
  354. exception:out-of-range
  355. (string-ref "hello" 123))
  356. (pass-if-exception "negative index"
  357. exception:out-of-range
  358. (string-ref "hello" -1))
  359. (pass-if "regular string, ASCII char"
  360. (char=? (string-ref "GNU Guile" 4) #\G))
  361. (pass-if "regular string, hex escaped Latin-1 char"
  362. (char=? (string-ref "--\xff--" 2)
  363. (integer->char #xff)))
  364. (pass-if "regular string, hex escaped BMP char"
  365. (char=? (string-ref "--\u0100--" 2)
  366. (integer->char #x0100)))
  367. (pass-if "regular string, hex escaped SMP char"
  368. (char=? (string-ref "--\U010300--" 2)
  369. (integer->char #x010300))))
  370. ;;
  371. ;; string-set!
  372. ;;
  373. (with-test-prefix "string-set!"
  374. (pass-if-exception "empty string"
  375. exception:out-of-range
  376. (string-set! (string-copy "") 0 #\x))
  377. (pass-if-exception "empty string and non-zero index"
  378. exception:out-of-range
  379. (string-set! (string-copy "") 123 #\x))
  380. (pass-if-exception "out of range"
  381. exception:out-of-range
  382. (string-set! (string-copy "hello") 123 #\x))
  383. (pass-if-exception "negative index"
  384. exception:out-of-range
  385. (string-set! (string-copy "hello") -1 #\x))
  386. (pass-if-exception "read-only string"
  387. exception:read-only-string
  388. (string-set! (substring/read-only "abc" 0) 1 #\space))
  389. (pass-if "regular string, ASCII char"
  390. (let ((s (string-copy "GNU guile")))
  391. (string-set! s 4 #\G)
  392. (char=? (string-ref s 4) #\G)))
  393. (pass-if "regular string, Latin-1 char"
  394. (let ((s (string-copy "GNU guile")))
  395. (string-set! s 4 (integer->char #xfe))
  396. (char=? (string-ref s 4) (integer->char #xfe))))
  397. (pass-if "regular string, BMP char"
  398. (let ((s (string-copy "GNU guile")))
  399. (string-set! s 4 (integer->char #x0100))
  400. (char=? (string-ref s 4) (integer->char #x0100))))
  401. (pass-if "regular string, SMP char"
  402. (let ((s (string-copy "GNU guile")))
  403. (string-set! s 4 (integer->char #x010300))
  404. (char=? (string-ref s 4) (integer->char #x010300)))))
  405. ;;
  406. ;; list->string
  407. ;;
  408. (with-test-prefix "string"
  409. (pass-if-exception "convert circular list to string"
  410. exception:wrong-type-arg
  411. (let ((foo (list #\a #\b #\c)))
  412. (set-cdr! (cddr foo) (cdr foo))
  413. (apply string foo))))
  414. (with-test-prefix "string-split"
  415. ;; in guile 1.6.7 and earlier, character >=128 wasn't matched in the string
  416. (pass-if "char 255"
  417. (equal? '("a" "b")
  418. (string-split (string #\a (integer->char 255) #\b)
  419. (integer->char 255)))))
  420. (with-test-prefix "substring-move!"
  421. (pass-if-exception "substring-move! checks start and end correctly"
  422. exception:out-of-range
  423. (substring-move! "sample" 3 0 "test" 3)))
  424. (with-test-prefix "substring/shared"
  425. (pass-if "modify indirectly"
  426. (let ((str (string-copy "foofoofoo")))
  427. (string-upcase! (substring/shared str 3 6))
  428. (string=? str "fooFOOfoo")))
  429. (pass-if "modify cow indirectly"
  430. (let* ((str1 (string-copy "foofoofoo"))
  431. (str2 (string-copy str1)))
  432. (string-upcase! (substring/shared str2 3 6))
  433. (and (string=? str1 "foofoofoo")
  434. (string=? str2 "fooFOOfoo"))))
  435. (pass-if "modify double indirectly"
  436. (let* ((str1 (string-copy "foofoofoo"))
  437. (str2 (substring/shared str1 2 7)))
  438. (string-upcase! (substring/shared str2 1 4))
  439. (string=? str1 "fooFOOfoo")))
  440. (pass-if "modify cow double indirectly"
  441. (let* ((str1 "foofoofoo")
  442. (str2 (substring str1 2 7)))
  443. (string-upcase! (substring/shared str2 1 4))
  444. (and (string=? str1 "foofoofoo")
  445. (string=? str2 "oFOOf")))))