knucleotide-2.scm 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. ;; The Computer Language Shootout
  2. ;; http://shootout.alioth.debian.org/
  3. ;; This is a Kawa implementation of the knucelotide benchmark.
  4. ;; This is is fairly low-level - it by-passes the Scheme APIs
  5. ;; to use pure Java libraries. It does make use of
  6. ;; gnu.kawa.util.AbstractHashTable, but it sort-of cheats by getting
  7. ;; down into implementation internals.
  8. ;;
  9. ;; Original Kawa version based on mzscheme version,
  10. ;; with some bits based on Java version contributed by James McIlree.
  11. (define-simple-class StrIntNode (java.util.Map$Entry)
  12. (next ::StrIntNode)
  13. (hash :: int)
  14. (key ::java.lang.String)
  15. (count ::int)
  16. ((getKey) key)
  17. ((getValue) ::object count))
  18. (define (make-node key::string hash::int count::int) ::StrIntNode
  19. (let ((n (StrIntNode)))
  20. (set! n:key key)
  21. (set! n:hash hash)
  22. (set! n:count count)
  23. n))
  24. (define-simple-class StrIntHashTable (gnu.kawa.util.AbstractHashTable)
  25. ((getEntryHashCode entry) (as StrIntNode entry):hash)
  26. ((getEntryNext entry) (as StrIntNode entry):next)
  27. ((setEntryNext entry next) (set! (as StrIntNode entry):next next))
  28. ((allocEntries n) (StrIntNode[] size: n))
  29. ((makeEntry key hash value) (make-node key hash value))
  30. ;; FIXME this code should be made into library code.
  31. ((toNodeArray) ::StrIntNode[]
  32. (let* ((arr ::StrIntNode[] table)
  33. (length arr:length)
  34. (n ::int ((this):size))
  35. (result (StrIntNode[] length: n))
  36. (j ::int 0))
  37. (do ((i ::int (- length 1) (- i 1)))
  38. ((< i 0) result)
  39. (do ((node ::StrIntNode (table i)
  40. node:next))
  41. ((eq? node #!null) #!void)
  42. (set! (result j) node)
  43. (set! j (+ j 1)))))))
  44. (define (all-counts (len :: int) (dna :: java.lang.String))
  45. (let ((table (StrIntHashTable)))
  46. (let loop ((s :: int (- (string-length dna) len)))
  47. (let* ((key (dna:substring s (+ s len)))
  48. (node ::StrIntNode (table:getNode key)))
  49. (if (eq? node #!null)
  50. ;; Non-optimal - requires recalculating hashCode.
  51. (table:put key (key:hashCode) 1)
  52. (set! node:count (+ node:count 1))))
  53. (if (> s 0)
  54. (loop (- s 1))))
  55. table))
  56. (define node-comparator ::java.util.Comparator
  57. (object (java.util.Comparator)
  58. ((compare o1 o2) ::int
  59. (let ((v1 ::int (as StrIntNode o1):count)
  60. (v2 ::int (as StrIntNode o2):count))
  61. (cond ((> v1 v2) -1)
  62. ((< v1 v2) 1)
  63. (else 0))))
  64. ((equals o)::boolean (eq? o (this)))))
  65. (define (write-freqs table::StrIntHashTable) ::void
  66. (let* ((content (table:toNodeArray))
  67. (size content:length)
  68. (total ::double
  69. (let ((sum ::int 0))
  70. (do ((i ::int 0 (+ i 1)))
  71. ((>= i size) sum)
  72. (set! sum (+ sum (content i):count)))
  73. sum)))
  74. (java.util.Arrays:sort content node-comparator)
  75. (do ((i ::int 0 (+ i 1)))
  76. ((>= i size))
  77. (let ((a (content i)))
  78. (format #t "~a ~,3f~%"
  79. a:key
  80. (* 100 (/ (as double a:count) total)))))))
  81. (define (write-one-freq table::StrIntHashTable key::string)
  82. (let* ((node ::StrIntNode (table:getNode key))
  83. (cnt (if (eq? node #!null) 0 node:count)))
  84. (format #t "~a\t~a~%" cnt key)))
  85. (define dna
  86. (let ((in :: input-port (current-input-port))
  87. (sb (java.lang.StringBuilder)))
  88. ;; Skip to ">THREE ..."
  89. (do ()
  90. ((let ((line (in:readLine)))
  91. (or (eq? line #!null) (line:startsWith ">THREE")))))
  92. (let loop ()
  93. (let ((line (in:readLine)))
  94. (cond (line
  95. (sb:append line)
  96. (loop)))))
  97. ((sb:toString):toUpperCase)))
  98. ;; 1-nucleotide counts:
  99. (write-freqs (all-counts 1 dna))
  100. (newline)
  101. ;; 2-nucleotide counts:
  102. (write-freqs (all-counts 2 dna))
  103. (newline)
  104. ;; Specific sequences:
  105. (for-each (lambda (seq)
  106. (write-one-freq (all-counts (string-length seq) dna)
  107. seq))
  108. '("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))