knucleotide.scm 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. ;; The Computer Language Shootout
  2. ;; http://shootout.alioth.debian.org/
  3. ;; Based on mzscheme version,
  4. ;; with some bits based on Java version contributed by James McIlree.
  5. (import (srfi :69 basic-hash-tables))
  6. (import (srfi :95 sorting-and-merging))
  7. (define (all-counts (len :: int) (dna :: java.lang.String))
  8. (let ((table (make-hash-table)))
  9. (let loop ((s :: int (- (string-length dna) len)))
  10. (let* ((key (string->symbol (dna:substring s (+ s len))))
  11. (cnt (hash-table-ref/default table key 0)))
  12. (hash-table-set! table key (+ cnt 1)))
  13. (if (> s 0)
  14. (loop (- s 1))))
  15. table))
  16. (define (write-freqs table)
  17. (let* ((content (hash-table->alist table))
  18. (total (exact->inexact (apply + (map cdr content)))))
  19. (for-each
  20. (lambda (a)
  21. (format #t "~a ~,3f~%"
  22. (car a)
  23. (* 100 (/ (cdr a) total))))
  24. (sort content (lambda (a b) (> (cdr a) (cdr b)))))))
  25. (define (write-one-freq table key)
  26. (let ((cnt (hash-table-ref/default table key 0)))
  27. (format #t "~a\t~a~%" cnt key)))
  28. (define dna
  29. (let ((in :: input-port (current-input-port))
  30. (sb (java.lang.StringBuilder)))
  31. ;; Skip to ">THREE ..."
  32. (do ()
  33. ((let ((line (in:readLine)))
  34. (or (eq? line #!null) (line:startsWith ">THREE")))))
  35. (let loop ()
  36. (let ((line (in:readLine)))
  37. (cond ((not (eq? line #!null))
  38. (sb:append line)
  39. (loop)))))
  40. ((sb:toString):toUpperCase)))
  41. ;; 1-nucleotide counts:
  42. (write-freqs (all-counts 1 dna))
  43. (newline)
  44. ;; 2-nucleotide counts:
  45. (write-freqs (all-counts 2 dna))
  46. (newline)
  47. ;; Specific sequences:
  48. (for-each (lambda (seq)
  49. (write-one-freq (all-counts (string-length seq) dna)
  50. (string->symbol seq)))
  51. '("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))