fasta.scm 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. ;;; The Computer Language Benchmarks Game
  2. ;;; http://shootout.alioth.debian.org/
  3. ;;;
  4. ;;; Kawa version by Per Bothner inspired by a combination of
  5. ;;; (1) PLT version contributed by Matthew Flatt
  6. ;;; derived from the Chicken variant by Anthony Borla
  7. ;;; (2) Java version "modified by Mehmet D. AKIN"
  8. (define ALU :: byte[]
  9. ("GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
  10. GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
  11. CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
  12. ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
  13. GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
  14. AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
  15. AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA":getBytes "UTF-8"))
  16. (define IUB :: vector
  17. #((#\a . 0.27) (#\c . 0.12) (#\g . 0.12) (#\t . 0.27) (#\B . 0.02)
  18. (#\D . 0.02) (#\H . 0.02) (#\K . 0.02) (#\M . 0.02) (#\N . 0.02)
  19. (#\R . 0.02) (#\S . 0.02) (#\V . 0.02) (#\W . 0.02) (#\Y . 0.02)))
  20. (define HomoSapiens :: vector
  21. #((#\a . 0.3029549426680) (#\c . 0.1979883004921)
  22. (#\g . 0.1975473066391) (#\t . 0.3015094502008)))
  23. (define-constant buffer-size :: int 2048)
  24. (define bbuffer :: byte[] (byte[] length: buffer-size))
  25. (define-constant line-size :: int 60)
  26. (define (make-cumulative-table (frequency-table :: vector)) :: double[]
  27. (let* ((cp :: double 0.0)
  28. (n :: int (vector-length frequency-table))
  29. (cumulative (double[] length: n)))
  30. (do ((i :: int 0 (+ i 1)))
  31. ((>= i n)
  32. cumulative)
  33. (let ((x :: pair (frequency-table i)))
  34. (set! cp (+ cp (cdr x)))
  35. (set! (cumulative i) cp)))))
  36. (define-private last :: int 42)
  37. (define (random-next (max :: double)) :: double
  38. (let* ((im :: int 139968))
  39. (set! last (remainder (+ 29573 (* last 3877)) 139968))
  40. (/ (* max last) im)))
  41. (define (select-random (frequency-table :: vector) (cumulative-table :: double[])) :: int
  42. (let ((rvalue (random-next 1.0)))
  43. (do ((i :: int 0 (+ i 1)))
  44. ((<= rvalue (cumulative-table i))
  45. (char->integer (car (frequency-table i)))))))
  46. ;; -------------
  47. (define-syntax generate-fasta
  48. (syntax-rules ()
  49. ((generate-fasta id desc n_ line-length out action)
  50. (let ((n :: int n_)
  51. (index :: int 0)
  52. (NL (char->integer #\newline)))
  53. (out:write (((string-append ">" id " " desc "\n"):toString):getBytes))
  54. (do ()
  55. ((<= n 0))
  56. (let ((m :: int (if (< n line-length) n line-length))
  57. (avail :: int (- buffer-size index)))
  58. (cond ((< avail m)
  59. (out:write bbuffer 0 index)
  60. (set! index 0)))
  61. (do ((i :: int 0 (+ i 1)))
  62. ((>= i m))
  63. (set! (bbuffer index) action)
  64. (set! index (+ index 1)))
  65. (set! (bbuffer index) NL)
  66. (set! index (+ index 1))
  67. (set! n (- n line-length))))
  68. (if (> index 0)
  69. (out:write bbuffer 0 index))))))
  70. (define (repeat-fasta (id :: string) (desc :: string) (n :: int)
  71. (ALU :: byte[])
  72. (out :: java.io.OutputStream)) :: void
  73. (let ((k :: int 0)
  74. (kn :: int ALU:length))
  75. (generate-fasta id desc n line-size out
  76. (begin
  77. (if (= k kn) (set! k 0))
  78. (let ((bval (ALU k)))
  79. (set! k (+ k 1))
  80. bval)))))
  81. (define (random-fasta (id :: string) (desc :: string) (n :: int)
  82. (frequency-table :: vector)
  83. (out :: java.io.OutputStream)) :: void
  84. (let ((cumulative :: double[] (make-cumulative-table frequency-table)))
  85. (generate-fasta id desc n line-size out
  86. (select-random frequency-table cumulative))))
  87. (let* ((args (cdr (command-line)))
  88. (n :: int (string->number (args 0)))
  89. (out :: java.io.OutputStream java.lang.System:out))
  90. (repeat-fasta "ONE" "Homo sapiens alu" (* n 2) ALU out)
  91. (random-fasta "TWO" "IUB ambiguity codes" (* n 3) IUB out)
  92. (random-fasta "THREE" "Homo sapiens frequency" (* n 5) HomoSapiens out))