regexp.test 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. ;;;; regexp.test --- test Guile's regexps -*- coding: utf-8; mode: scheme -*-
  2. ;;;; Jim Blandy <jimb@red-bean.com> --- September 1999
  3. ;;;;
  4. ;;;; Copyright (C) 1999, 2004, 2006, 2007, 2008, 2009, 2010,
  5. ;;;; 2012, 2013, 2014 Free Software Foundation, Inc.
  6. ;;;;
  7. ;;;; This library is free software; you can redistribute it and/or
  8. ;;;; modify it under the terms of the GNU Lesser General Public
  9. ;;;; License as published by the Free Software Foundation; either
  10. ;;;; version 3 of the License, or (at your option) any later version.
  11. ;;;;
  12. ;;;; This library is distributed in the hope that it will be useful,
  13. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. ;;;; Lesser General Public License for more details.
  16. ;;;;
  17. ;;;; You should have received a copy of the GNU Lesser General Public
  18. ;;;; License along with this library; if not, write to the Free Software
  19. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. (define-module (test-suite test-regexp)
  21. #:use-module (test-suite lib)
  22. #:use-module (srfi srfi-1)
  23. #:use-module (ice-9 regex))
  24. (when (defined? 'setlocale)
  25. (setlocale LC_ALL "C"))
  26. ;; Don't fail if we can't display a test name to stdout/stderr.
  27. (set-port-conversion-strategy! (current-output-port) 'escape)
  28. (set-port-conversion-strategy! (current-error-port) 'escape)
  29. ;;; Run a regexp-substitute or regexp-substitute/global test, once
  30. ;;; providing a real port and once providing #f, requesting direct
  31. ;;; string output.
  32. (define (vary-port func expected . args)
  33. (pass-if "port is string port"
  34. (equal? expected
  35. (call-with-output-string
  36. (lambda (port)
  37. (apply func port args)))))
  38. (pass-if "port is #f"
  39. (equal? expected
  40. (apply func #f args))))
  41. (define (object->string obj)
  42. (call-with-output-string
  43. (lambda (port)
  44. (write obj port))))
  45. ;;;
  46. ;;; make-regexp
  47. ;;;
  48. (with-test-prefix "make-regexp"
  49. (pass-if-exception "no args" exception:wrong-num-args
  50. (make-regexp))
  51. (pass-if-exception "bad pat arg" exception:wrong-type-arg
  52. (make-regexp 'blah))
  53. ;; in guile prior to 1.6.5 make-regex didn't validate its flags args
  54. (pass-if-exception "bad arg 2" exception:wrong-type-arg
  55. (make-regexp "xyz" 'abc))
  56. (pass-if-exception "bad arg 3" exception:wrong-type-arg
  57. (make-regexp "xyz" regexp/icase 'abc)))
  58. ;;;
  59. ;;; match:string
  60. ;;;
  61. (with-test-prefix "match:string"
  62. (pass-if "foo"
  63. (string=? "foo" (match:string (string-match ".*" "foo"))))
  64. (pass-if "foo offset 1"
  65. (string=? "foo" (match:string (string-match ".*" "foo" 1)))))
  66. ;;;
  67. ;;; regexp-exec
  68. ;;;
  69. (with-test-prefix "regexp-exec"
  70. (pass-if-exception "non-integer offset" exception:wrong-type-arg
  71. (let ((re (make-regexp "ab+")))
  72. (regexp-exec re "aaaabbbb" 1.5 'bogus-flags-arg)))
  73. (pass-if-exception "non-string input" exception:wrong-type-arg
  74. (let ((re (make-regexp "ab+")))
  75. (regexp-exec re 'not-a-string)))
  76. (pass-if-exception "non-string input, with offset" exception:wrong-type-arg
  77. (let ((re (make-regexp "ab+")))
  78. (regexp-exec re 'not-a-string 5)))
  79. ;; in guile 1.8.1 and earlier, a #\nul character in the input string was
  80. ;; only detected in a critical section, and the resulting error throw
  81. ;; abort()ed the program
  82. (pass-if-exception "nul in input" exception:string-contains-nul
  83. (let ((re (make-regexp "ab+")))
  84. (regexp-exec re (string #\a #\b (integer->char 0)))))
  85. ;; in guile 1.8.1 and earlier, a bogus flags argument was only detected
  86. ;; inside a critical section, and the resulting error throw abort()ed the
  87. ;; program
  88. (pass-if-exception "non-integer flags" exception:wrong-type-arg
  89. (let ((re (make-regexp "ab+")))
  90. (regexp-exec re "aaaabbbb" 0 'bogus-flags-arg))))
  91. ;;;
  92. ;;; fold-matches
  93. ;;;
  94. (with-test-prefix "fold-matches"
  95. (pass-if "without flags"
  96. (equal? '("hello")
  97. (fold-matches "^[a-z]+$" "hello" '()
  98. (lambda (match result)
  99. (cons (match:substring match)
  100. result)))))
  101. (pass-if "with flags"
  102. ;; Prior to 1.8.6, passing an additional flag would not work.
  103. (null?
  104. (fold-matches "^[a-z]+$" "hello" '()
  105. (lambda (match result)
  106. (cons (match:substring match)
  107. result))
  108. (logior regexp/notbol regexp/noteol))))
  109. (pass-if "regexp/notbol is set correctly"
  110. (equal? '("foo")
  111. (fold-matches "^foo" "foofoofoofoo" '()
  112. (lambda (match result)
  113. (cons (match:substring match)
  114. result))))))
  115. ;;;
  116. ;;; regexp-quote
  117. ;;;
  118. (define-syntax with-ascii-or-latin1-locale
  119. (syntax-rules ()
  120. ((_ chr body ...)
  121. (if (> chr 127)
  122. (with-latin1-locale body ...)
  123. (begin body ...)))))
  124. (define char-code-limit 256)
  125. (with-test-prefix "regexp-quote"
  126. (pass-if-exception "no args" exception:wrong-num-args
  127. (regexp-quote))
  128. (pass-if-exception "bad string arg" exception:wrong-type-arg
  129. (regexp-quote 'blah))
  130. (let ((lst `((regexp/basic ,regexp/basic)
  131. (regexp/extended ,regexp/extended)))
  132. ;; String of all latin-1 characters, except #\nul which doesn't
  133. ;; work because it's the usual end-of-string for the underlying
  134. ;; C regexec().
  135. (allchars (list->string (map integer->char (cdr (iota 256))))))
  136. (for-each
  137. (lambda (elem)
  138. (let ((name (car elem))
  139. (flag (cadr elem)))
  140. (with-test-prefix name
  141. ;; Try on each individual latin-1 character, except #\nul.
  142. (do ((i 1 (1+ i)))
  143. ((>= i 256))
  144. (let* ((c (integer->char i))
  145. (s (string c)))
  146. (pass-if (list "char" i (format #f "~s ~s" c s))
  147. (with-ascii-or-latin1-locale i
  148. (let* ((q (regexp-quote s))
  149. (m (regexp-exec (make-regexp q flag) s)))
  150. (and (= 0 (match:start m))
  151. (= 1 (match:end m))))))))
  152. ;; Try on pattern "aX" where X is each latin-1 character,
  153. ;; except #\nul. This exposes things like "?" which are
  154. ;; special only when they follow a pattern to repeat or
  155. ;; whatever ("a" in this case).
  156. (do ((i 1 (1+ i)))
  157. ((>= i 256))
  158. (let* ((c (integer->char i))
  159. (s (string #\a c))
  160. (q (regexp-quote s)))
  161. (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q))
  162. (with-ascii-or-latin1-locale i
  163. (let* ((m (regexp-exec (make-regexp q flag) s)))
  164. (and (= 0 (match:start m))
  165. (= 2 (match:end m))))))))
  166. (pass-if "string of all chars"
  167. (with-latin1-locale
  168. (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
  169. flag)
  170. allchars)))
  171. (and (= 0 (match:start m))
  172. (= (string-length allchars) (match:end m)))))))))
  173. lst)))
  174. ;;;
  175. ;;; regexp-substitute
  176. ;;;
  177. (with-test-prefix "regexp-substitute"
  178. (let ((match
  179. (string-match "patleft(sub1)patmid(sub2)patright"
  180. "contleftpatleftsub1patmidsub2patrightcontright")))
  181. (define (try expected . args)
  182. (with-test-prefix (object->string args)
  183. (apply vary-port regexp-substitute expected match args)))
  184. (try "")
  185. (try "string1" "string1")
  186. (try "string1string2" "string1" "string2")
  187. (try "patleftsub1patmidsub2patright" 0)
  188. (try "hi-patleftsub1patmidsub2patright-bye" "hi-" 0 "-bye")
  189. (try "sub1" 1)
  190. (try "hi-sub1-bye" "hi-" 1 "-bye")
  191. (try "hi-sub2-bye" "hi-" 2 "-bye")
  192. (try "contleft" 'pre)
  193. (try "contright" 'post)
  194. (try "contrightcontleft" 'post 'pre)
  195. (try "contrightcontleftcontrightcontleft" 'post 'pre 'post 'pre)
  196. (try "contrightsub2sub1contleft" 'post 2 1 'pre)
  197. (try "foosub1sub1sub1sub1bar" "foo" 1 1 1 1 "bar")))
  198. (with-test-prefix "regexp-substitute/global"
  199. (define (try expected . args)
  200. (with-test-prefix (object->string args)
  201. (apply vary-port regexp-substitute/global expected args)))
  202. (try "hi" "a(x*)b" "ab" "hi")
  203. (try "" "a(x*)b" "ab" 1)
  204. (try "xx" "a(x*)b" "axxb" 1)
  205. (try "xx" "a(x*)b" "_axxb_" 1)
  206. (try "pre" "a(x*)b" "preaxxbpost" 'pre)
  207. (try "post" "a(x*)b" "preaxxbpost" 'post)
  208. (try "string" "x" "string" 'pre "y" 'post)
  209. (try "4" "a(x*)b" "_axxb_" (lambda (m)
  210. (number->string (match:end m 1))))
  211. (try "_aybycyd_" "x+" "_axbxxcxxxd_" 'pre "y" 'post)
  212. ;; This should not go into an infinite loop, just because the regexp
  213. ;; can match the empty string. This test also kind of beats on our
  214. ;; definition of where a null string can match.
  215. (try "y_yaybycydy_y" "x*" "_axbxxcxxxd_" 'pre "y" 'post)
  216. ;; These kind of bother me. The extension from regexp-substitute to
  217. ;; regexp-substitute/global is only natural if your item list
  218. ;; includes both pre and post. If those are required, why bother
  219. ;; to include them at all?
  220. (try "4:7:12:_" "a(x*)b" "_axxbaxbaxxxb_"
  221. (lambda (m) (number->string (match:end m 1))) ":"
  222. 'post)
  223. (try "4:10:19:_:19:10:4" "a(x*)b" "_axxbaxxxxbaxxxxxxxb_"
  224. (lambda (m) (number->string (match:end m 1))) ":"
  225. 'post
  226. ":" (lambda (m) (number->string (match:end m 1))))
  227. ;; Jan Nieuwenhuizen's bug, 2 Sep 1999
  228. (try "" "_" (make-string 500 #\_)
  229. 'post))
  230. (with-test-prefix "nonascii locales"
  231. (pass-if "match structures refer to char offsets"
  232. (with-locale "en_US.utf8"
  233. ;; bug #31650
  234. (equal? (match:substring (string-match ".*" "calçot") 0)
  235. "calçot")))
  236. (pass-if "match structures refer to char offsets, non-ASCII pattern"
  237. (with-locale "en_US.utf8"
  238. ;; bug #31650
  239. (equal? (match:substring (string-match "λ: The Ultimate (.*)"
  240. "λ: The Ultimate GOTO")
  241. 1)
  242. "GOTO"))))