regex-test.scm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491
  1. (test-begin "regex")
  2. (require 'regex)
  3. (require 'pregexp)
  4. ; Based on pregexp-test.scm.
  5. ; Redefined test in terms of test-equal:
  6. ; (test (expression ...) expected-result ...)
  7. ; is translated to
  8. ; (test-equal expected-result (expression ...)) ...
  9. ; Also added test syntax variant:
  10. ; (#(f1 args1 ...) #(f2 args2 ...) shared-args ...) expected-result
  11. ; This is equivalent to:
  12. ; (test-equal expected-result (f1 args1 ... shared-args ...))
  13. ; (test-equal expected-result (f2 args2 ... shared-args ...))
  14. ; It's used to test regex and pregexp versions at once.
  15. ;Copyright (c) 1999-2005, Dorai Sitaram.
  16. ;All rights reserved.
  17. ;Permission to copy, modify, distribute, and use this work or
  18. ;a modified copy of this work, for any purpose, is hereby
  19. ;granted, provided that the copy includes this copyright
  20. ;notice, and in the case of a modified copy, also includes a
  21. ;notice of modification. This work is provided as is, with
  22. ;no warranty of any kind.
  23. (define-syntax (test form)
  24. (syntax-case form ()
  25. ((test) #!void)
  26. ((test expr expected . rest)
  27. (syntax-case (syntax expr) ()
  28. ((#(f1 a1 ...) #(f2 a2 ...) . args)
  29. #`(begin
  30. #,(gnu.lists.PairWithPosition (syntax->datum (syntax expr))
  31. 'test-equal
  32. #`('expected (f1 a1 ... . args)))
  33. #,(gnu.lists.PairWithPosition (syntax->datum (syntax expr))
  34. 'test-equal
  35. #`('expected (f2 a2 ... . args)))
  36. (test . rest)))
  37. (_
  38. #`(begin
  39. #,(gnu.lists.PairWithPosition (syntax->datum (syntax expr))
  40. 'test-equal
  41. #'('expected expr))
  42. (test . rest)))))))
  43. ;last substantial change 2005-04-24
  44. ;last change 2008-04-12
  45. (test
  46. (pregexp "c.r")
  47. (:sub (:or (:seq #\c :any #\r)))
  48. (#(pregexp-match-positions) #(regex-match-positions) "brain" "bird")
  49. #f
  50. (#(pregexp-match-positions "needle")
  51. #(regex-match-positions #/needle/)
  52. "hay needle stack")
  53. ((4 . 10))
  54. (#(pregexp-match-positions) #(regex-match-positions) "needle"
  55. "his hay needle stack -- my hay needle stack -- her hay needle stack"
  56. 24 43)
  57. ((31 . 37))
  58. (#(pregexp-match) #(regex-match) "brain" "bird")
  59. #f
  60. (#(pregexp-match) #(regex-match) "needle" "hay needle stack")
  61. ("needle")
  62. (#(pregexp-split) #(regex-split)
  63. ":" "/bin:/usr/bin:/usr/bin/X11:/usr/local/bin")
  64. ("/bin" "/usr/bin" "/usr/bin/X11" "/usr/local/bin")
  65. (#(pregexp-split) #(regex-split) " " "pea soup")
  66. ("pea" "soup")
  67. (pregexp-split "" "smithereens")
  68. ("s" "m" "i" "t" "h" "e" "r" "e" "e" "n" "s")
  69. (regex-split "" "smithereens")
  70. ("" "s" "m" "i" "t" "h" "e" "r" "e" "e" "n" "s" "") ; NOTE difference
  71. (pregexp-split " +" "split pea soup")
  72. ("split" "pea" "soup")
  73. (regex-split " +" "split pea soup")
  74. ("split" "pea" "soup")
  75. (pregexp-split " *" "split pea soup")
  76. ("s" "p" "l" "i" "t" "p" "e" "a" "s" "o" "u" "p")
  77. (regex-split " *" "split pea soup")
  78. ("" "s" "p" "l" "i" "t" "" "p" "e" "a" "" "s" "o" "u" "p" "") ; NOTE difference
  79. (#(pregexp-replace) #(regex-replace) "te" "liberte" "ty")
  80. "liberty"
  81. (#(pregexp-replace*) #(regex-replace*) "te" "liberte egalite fraternite" "ty")
  82. "liberty egality fratyrnity"
  83. (pregexp-match-positions "^contact" "first contact")
  84. #f
  85. (#(pregexp-match-positions) #(regex-match-positions) "laugh$" "laugh laugh laugh laugh")
  86. ((18 . 23))
  87. (#(pregexp-match-positions) #(regex-match-positions) "yack\\b" "yackety yack")
  88. ((8 . 12))
  89. (#(pregexp-match-positions) #(regex-match-positions) "an\\B" "an analysis")
  90. ((3 . 5))
  91. (#(pregexp-match) #(regex-match) "p.t" "pet")
  92. ("pet")
  93. (#(pregexp-match) #(regex-match) "\\d\\d" "0 dear, 1 have to read catch 22 before 9")
  94. ("22")
  95. (#(pregexp-match "[[:alpha:]_]")
  96. #(regex-match #/[\p{Alpha}_]/)
  97. "--x--")
  98. ("x")
  99. (#(pregexp-match "[[:alpha:]_]")
  100. #(regex-match #/[\p{Alpha}_]/)
  101. "--_--")
  102. ("_")
  103. (#(pregexp-match "[[:alpha:]_]")
  104. #(regex-match #/[\p{Alpha}_]/) "--:--")
  105. #f
  106. (#(pregexp-match "[:alpha:]")
  107. #(regex-match #/\p{Alpha}/) "--a--")
  108. ("a")
  109. (#(pregexp-match "[:alpha:]")
  110. #(regex-match #/\p{Alpha}/) "--_--")
  111. #f
  112. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]*r" "cadaddadddr")
  113. ((0 . 11))
  114. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]*r" "cr")
  115. ((0 . 2))
  116. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]+r" "cadaddadddr")
  117. ((0 . 11))
  118. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]+r" "cr")
  119. #f
  120. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]?r" "cadaddadddr")
  121. #f
  122. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]?r" "cr")
  123. ((0 . 2))
  124. (#(pregexp-match-positions) #(regex-match-positions) "c[ad]?r" "car")
  125. ((0 . 3))
  126. (#(pregexp-match) #(regex-match) "[aeiou]{3}" "vacuous")
  127. ("uou")
  128. (#(pregexp-match) #(regex-match) "[aeiou]{3}" "evolve")
  129. #f
  130. (#(pregexp-match) #(regex-match) "[aeiou]{2,3}" "evolve")
  131. #f
  132. (#(pregexp-match) #(regex-match) "[aeiou]{2,3}" "zeugma")
  133. ("eu")
  134. (#(pregexp-match) #(regex-match) "<.*>" "<tag1> <tag2> <tag3>")
  135. ("<tag1> <tag2> <tag3>")
  136. (#(pregexp-match) #(regex-match) "<.*?>" "<tag1> <tag2> <tag3>")
  137. ("<tag1>")
  138. (#(pregexp-match) #(regex-match) "([a-z]+) ([0-9]+), ([0-9]+)" "jan 1, 1970")
  139. ("jan 1, 1970" "jan" "1" "1970")
  140. (#(pregexp-match) #(regex-match) "(poo )*" "poo poo platter")
  141. ("poo poo " "poo ")
  142. (#(pregexp-match) #(regex-match) "([a-z ]+;)*" "lather; rinse; repeat;")
  143. ("lather; rinse; repeat;" " repeat;")
  144. )
  145. ;match `month year' or `month day, year'.
  146. ;subpattern matches day, if present
  147. (define date-re
  148. #/([a-z]+) +([0-9]+,)? *([0-9]+)/)
  149. (define date-pre
  150. (pregexp "([a-z]+) +([0-9]+,)? *([0-9]+)"))
  151. (test
  152. (#(pregexp-match date-pre) #(regex-match date-re) "jan 1, 1970")
  153. ("jan 1, 1970" "jan" "1," "1970")
  154. (#(pregexp-match date-pre) #(regex-match date-re) "jan 1970")
  155. ("jan 1970" "jan" #f "1970")
  156. (#(pregexp-replace "_(.+?)_"
  157. "the _nina_, the _pinta_, and the _santa maria_"
  158. "*\\1*")
  159. #(regex-replace "_(.+?)_"
  160. "the _nina_, the _pinta_, and the _santa maria_"
  161. "*$1*"))
  162. "the *nina*, the _pinta_, and the _santa maria_"
  163. (#(pregexp-replace* "_(.+?)_"
  164. "the _nina_, the _pinta_, and the _santa maria_"
  165. "*\\1*")
  166. #(regex-replace* "_(.+?)_"
  167. "the _nina_, the _pinta_, and the _santa maria_"
  168. "*$1*"))
  169. "the *nina*, the *pinta*, and the *santa maria*"
  170. (#(pregexp-replace "(\\S+) (\\S+) (\\S+)"
  171. "eat to live"
  172. "\\3 \\2 \\1")
  173. #(regex-replace #/(\S+) (\S+) (\S+)/
  174. "eat to live"
  175. "$3 $2 $1"))
  176. "live to eat"
  177. (#(pregexp-match) #(regex-match) "([a-z]+) and \\1"
  178. "billions and billions")
  179. ("billions and billions" "billions")
  180. (#(pregexp-match) #(regex-match) "([a-z]+) and \\1"
  181. "billions and millions")
  182. #f
  183. (#(pregexp-replace* "(\\S+) \\1"
  184. "now is the the time for all good men to to come to the aid of of the party"
  185. "\\1")
  186. #(regex-replace* #/(\S+) \1/
  187. "now is the the time for all good men to to come to the aid of of the party"
  188. "$1"))
  189. "now is the time for all good men to come to the aid of the party"
  190. (#(pregexp-replace* "(\\d+)\\1"
  191. "123340983242432420980980234"
  192. "{\\1,\\1}")
  193. #(regex-replace* #/(\d+)\1/
  194. "123340983242432420980980234"
  195. "{$1,$1}"))
  196. "12{3,3}40983{24,24}3242{098,098}0234"
  197. (#(pregexp-match) #(regex-match) "^(?:[a-z]*/)*([a-z]+)$"
  198. "/usr/local/bin/mzscheme")
  199. ("/usr/local/bin/mzscheme" "mzscheme")
  200. (#(pregexp-match) #(regex-match) "(?i:hearth)" "HeartH")
  201. ("HeartH")
  202. (regex-match #/hearth/i "HeartH")
  203. ("HeartH")
  204. (#(pregexp-match) #(regex-match) "(?x: a lot)" "alot")
  205. ("alot")
  206. (#(pregexp-match) #(regex-match) "(?x: a \\ lot)" "a lot")
  207. ("a lot")
  208. (pregexp-match "(?x:
  209. a \\ man \\; \\ ; ignore
  210. a \\ plan \\; \\ ; me
  211. a \\ canal ; completely
  212. )"
  213. "a man; a plan; a canal")
  214. ("a man; a plan; a canal")
  215. (#(pregexp-match "(?ix:
  216. a \\ man \\; \\ ; ignore
  217. a \\ plan \\; \\ ; me
  218. a \\ canal ; completely
  219. )")
  220. #(regex-match "(?i:a man; a plan; a canal)")
  221. "A Man; a Plan; a Canal")
  222. ("A Man; a Plan; a Canal")
  223. (#(pregexp-match) #(regex-match) "(?i:the (?-i:TeX)book)"
  224. "The TeXbook")
  225. ("The TeXbook")
  226. (#(pregexp-match) #(regex-match) "f(ee|i|o|um)" "a small, final fee")
  227. ("fi" "i")
  228. (#(pregexp-replace* "([yi])s(e[sdr]?|ing|ation)"
  229. "it is energising to analyse an organisation pulsing with noisy organisms"
  230. "\\1z\\2")
  231. #(regex-replace* "([yi])s(e[sdr]?|ing|ation)"
  232. "it is energising to analyse an organisation pulsing with noisy organisms"
  233. "$1z$2"))
  234. "it is energizing to analyze an organization pulsing with noisy organisms"
  235. (#(pregexp-match) #(regex-match) "f(?:ee|i|o|um)" "fun for all")
  236. ("fo")
  237. (#(pregexp-match) #(regex-match) "call|call-with-current-continuation"
  238. "call-with-current-continuation")
  239. ("call")
  240. (#(pregexp-match) #(regex-match) "call-with-current-continuation|call"
  241. "call-with-current-continuation")
  242. ("call-with-current-continuation")
  243. (#(pregexp-match) #(regex-match) "(?:call|call-with-current-continuation) constrained"
  244. "call-with-current-continuation constrained")
  245. ("call-with-current-continuation constrained")
  246. (#(pregexp-match) #(regex-match) "(?>a+)." "aaaa")
  247. #f
  248. (#(pregexp-match-positions) #(regex-match-positions) "grey(?=hound)"
  249. "i left my grey socks at the greyhound")
  250. ((28 . 32))
  251. (#(pregexp-match-positions) #(regex-match-positions) "grey(?!hound)"
  252. "the gray greyhound ate the grey socks")
  253. ((27 . 31))
  254. (#(pregexp-match-positions) #(regex-match-positions) "(?<=grey)hound"
  255. "the hound in the picture is not a greyhound")
  256. ((38 . 43))
  257. (#(pregexp-match-positions) #(regex-match-positions) "(?<!grey)hound"
  258. "the greyhound in the picture is not a hound")
  259. ((38 . 43))
  260. )
  261. #|
  262. (define n0-255
  263. "(?x:
  264. \\d ; 0 through 9
  265. | \\d\\d ; 00 through 99
  266. | [01]\\d\\d ;000 through 199
  267. | 2[0-4]\\d ;200 through 249
  268. | 25[0-5] ;250 through 255
  269. )")
  270. |#
  271. (define n0-255
  272. "(?:\\d|\\d\\d|[01]\\d\\d|2[0-4]\\d|25[0-5])")
  273. (define ip-re1
  274. (string-append
  275. "^" ;nothing before
  276. n0-255 ;the first n0-255,
  277. "(?x:" ;then the subpattern of
  278. "\\." ;a dot followed by
  279. n0-255 ;an n0-255,
  280. ")" ;which is
  281. "{3}" ;repeated exactly 3 times
  282. "$" ;with nothing following
  283. ))
  284. (test
  285. (#(pregexp-match) #(regex-match) ip-re1
  286. "1.2.3.4")
  287. ("1.2.3.4")
  288. (#(pregexp-match) #(regex-match) ip-re1
  289. "55.155.255.265")
  290. #f
  291. (#(pregexp-match) #(regex-match) ip-re1
  292. "0.00.000.00")
  293. ("0.00.000.00")
  294. )
  295. (define ip-re
  296. (string-append
  297. "(?=[1-9])" ;ensure there's a non-0 digit
  298. ip-re1))
  299. (test
  300. (#(pregexp-match) #(regex-match) ip-re
  301. "1.2.3.4")
  302. ("1.2.3.4")
  303. (#(pregexp-match) #(regex-match) ip-re
  304. "0.0.0.0")
  305. #f
  306. )
  307. (set! ip-re
  308. (string-append
  309. "(?![0.]*$)" ;not just zeros and dots
  310. ;dot is not metachar inside []
  311. ip-re1))
  312. (test
  313. (#(pregexp-match) #(regex-match) ip-re
  314. "1.2.3.4")
  315. ("1.2.3.4")
  316. (#(pregexp-match) #(regex-match) ip-re
  317. "0.0.0.0")
  318. #f
  319. ;misc
  320. (#(pregexp-match) #(regex-match) "a[^a]*b" "glauber")
  321. ("aub")
  322. (#(pregexp-match) #(regex-match) "a([^a]*)b" "glauber")
  323. ("aub" "u")
  324. (#(pregexp-match) #(regex-match) "a([^a]*)b" "ababababab")
  325. ("ab" "")
  326. (#(pregexp-match) #(regex-match) "(?x: s e * k )" "seeeeek")
  327. ("seeeeek")
  328. (#(pregexp-match "(?x: t ;matches t
  329. h ; matches h
  330. e ;;; matches e
  331. \\ ; ; ; matches space
  332. \\; ; matches ;
  333. )")
  334. #(regex-match "(?x: t # matches t
  335. h # matches h
  336. e # matches e
  337. \\ # matches space
  338. \\; # matches ;
  339. )")
  340. "the ;")
  341. ("the ;")
  342. (#(pregexp-replace* "^(.*)$" "foobar" "\\1abc")
  343. #(regex-replace* "^(.*)$" "foobar" "$1abc"))
  344. "foobarabc"
  345. (#(pregexp-replace* "^(.*)$" "foobar" "abc\\1")
  346. #(regex-replace* "^(.*)$" "foobar" "abc$1"))
  347. "abcfoobar"
  348. (pregexp-replace* "(.*)$" "foobar" "abc\\1")
  349. "abcfoobar"
  350. (regex-replace* "(.*)$" "foobar" "abc$1")
  351. "abcfoobarabc" ;; NOTE difference
  352. )
  353. (test
  354. ;PLT bug 6095 from Neil W. Van Dyke
  355. (pregexp "[a-z-]")
  356. (:sub (:or (:seq (:one-of-chars (:char-range #\a #\z) #\-))))
  357. ;
  358. (pregexp "[-a-z]")
  359. (:sub (:or (:seq (:one-of-chars #\- (:char-range #\a #\z)))))
  360. ;PLT bug 6442 from David T. Pierson
  361. (#(pregexp-match-positions) #(regex-match-positions) "(a(b))?c" "abc")
  362. ((0 . 3) (0 . 2) (1 . 2))
  363. ;
  364. (#(pregexp-match-positions) #(regex-match-positions) "(a(b))?c" "c")
  365. ((0 . 1) #f #f)
  366. ;PLT bug 7233 from Edi Weitz
  367. (#(length (pregexp-match "(a)|(b)" "b"))
  368. #(length (regex-match "(a)|(b)" "b")))
  369. 3
  370. ;PLT bug 7232 from Neil Van Dyke
  371. (pregexp "[-a]")
  372. (:sub (:or (:seq (:one-of-chars #\- #\a))))
  373. ;
  374. (pregexp "[a-]")
  375. (:sub (:or (:seq (:one-of-chars #\a #\-))))
  376. )
  377. (test-end)