regexdna.scm 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. #| Kawa
  2. The Computer Language Benchmarks Game
  3. http://shootout.alioth.debian.org/
  4. Contributed by Per Bothner
  5. Based on Java 6 server #4 version
  6. contributed by Razii, idea taken from Elliott Hughes and Roger Millington
  7. |#
  8. (import (srfi :69 basic-hash-tables))
  9. (define replacements (alist->hash-table
  10. '(("W" . "(a|t)")
  11. ("Y" . "(c|t)")
  12. ("K" . "(g|t)")
  13. ("M" . "(a|c)")
  14. ("S" . "(c|g)")
  15. ("R" . "(a|g)")
  16. ("B" . "(c|g|t)")
  17. ("D" . "(a|g|t)")
  18. ("V" . "(a|c|g)")
  19. ("H" . "(a|c|t)")
  20. ("N" . "(a|c|g|t)"))))
  21. (define-syntax rewrite
  22. (syntax-rules ()
  23. ((rewrite pattern original replace)
  24. (let* ((matcher (pattern:matcher original))
  25. (destination (java.lang.StringBuffer (original:length))))
  26. (do () ((not (matcher:find)))
  27. (matcher:appendReplacement destination "")
  28. (replace matcher destination))
  29. (matcher:appendTail destination)
  30. (destination:toString)))))
  31. (define variants
  32. (java.lang.String[]
  33. "agggtaaa|tttaccct"
  34. "[cgt]gggtaaa|tttaccc[acg]"
  35. "a[act]ggtaaa|tttacc[agt]t"
  36. "ag[act]gtaaa|tttac[agt]ct"
  37. "agg[act]taaa|ttta[agt]cct"
  38. "aggg[acg]aaa|ttt[cgt]ccct"
  39. "agggt[cgt]aa|tt[acg]accct"
  40. "agggta[cgt]a|t[acg]taccct"
  41. "agggtaa[cgt]|[acg]ttaccct"))
  42. (define (regexdna (in :: java.io.InputStream))
  43. (let* ((r (java.io.InputStreamReader in "ISO-8859-1"))
  44. (sb :: java.lang.StringBuilder
  45. (let ((cbuf (char[] length: 16384))
  46. (sb (java.lang.StringBuilder 5100000)))
  47. (let loop ()
  48. (let ((chars-read (r:read cbuf)))
  49. (cond ((>= chars-read 0)
  50. (sb:append cbuf 0 chars-read)
  51. (loop)))))
  52. sb))
  53. (initial-length (sb:length))
  54. (sequence :: string
  55. (rewrite #/>.*\n|\n/ sb
  56. (lambda (matcher destination) #!void)))
  57. (code-length (sequence:length))
  58. (nvariants variants:length))
  59. (do ((i :: int 0 (+ i 1)))
  60. ((>= i nvariants))
  61. (let* ((count :: int 0)
  62. (variant (variants i))
  63. (m ((java.util.regex.Pattern:compile variant):matcher sequence)))
  64. (do ()
  65. ((not (m:find)))
  66. (set! count (+ count 1)))
  67. (format #t "~a ~d~%" variant count)))
  68. (set! sequence
  69. (rewrite #/[WYKMSRBDVHN]/ sequence
  70. (lambda (matcher destination)
  71. (destination:append (replacements:get(matcher:group 0))))))
  72. (format #t "~%~d~%~d~%~d~%"
  73. initial-length code-length (sequence:length))))
  74. (regexdna java.lang.System:in)