123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- (library (statistics)
- (export display-statistics
- display-learned-statistic)
- (import (except (rnrs base) vector-for-each)
- (only (guile)
- lambda* λ
- simple-format
- current-output-port
- current-input-port
- exact->inexact
- sort)
- ;; GNU Guile batteries
- (ice-9 exceptions)
- (ice-9 match)
- (ice-9 format)
- ;; custom libraries
- (vocabulary-data)
- (alist-procs)
- ;; custom helper libraries
- ;; SRFIs
- (srfi srfi-1)
- ;; SRFI 43 - vector procs
- (srfi srfi-43)
- ;; SRFI 69 - hash tables
- (srfi srfi-69)
- (vector-procs)))
- (define rest cdr)
- ;; NOTE: Calculating statistics might be a good candidate
- ;; for using fibers, to calculate all of the statistics in
- ;; separate fibers and speed up the calculation.
- (define display-learned-statistic
- (lambda* (vocabulary
- #:key
- (input-port (current-input-port))
- (output-port (current-output-port)))
- "Display statistics about the given VOCABULARY learned attribute."
- (let ([learned-vocabulary
- (vocabulary-filter (λ (entry)
- (get:entry/attribute entry '("metadata" "learned")))
- vocabulary)])
- (let ([num-learned
- (vector-length (get:vocabulary/entries learned-vocabulary))]
- [num-total
- (vector-length (get:vocabulary/entries vocabulary))])
- (simple-format output-port
- "learned vocabulary:\n ~a of ~a (~a%)\n"
- num-learned
- num-total
- (format #f
- "~,2f"
- (exact->inexact (* (/ 100 num-total) num-learned))))))))
- (define display-tags-count-statistic
- (lambda* (vocabulary
- #:key
- (input-port (current-input-port))
- (output-port (current-output-port)))
- "Display statistics about the count for each tag in the
- given VOCABULARY."
- (simple-format output-port "tag counts:\n")
- (let ([tag-count-table (make-hash-table)]
- [entries (get:vocabulary/entries vocabulary)])
- (vector-for-each
- (λ (ind entry)
- (let ([tags-attr (get:entry/attribute entry '("metadata" "tags"))])
- (vector-for-each
- (λ (ind tag)
- (hash-table-update!/default tag-count-table
- tag
- (λ (prev-val) (+ prev-val 1))
- 0))
- tags-attr)))
- entries)
- (let ([sorted-tag-alist
- (sort (hash-table->alist tag-count-table)
- (λ (tag-and-count-1 tag-and-count-2)
- (< (alist-item-value tag-and-count-1)
- (alist-item-value tag-and-count-2))))])
- ;; Using named let looping construct, to not rely on
- ;; unspecified order of map going through a list.
- (let iter ([remaining-tags-and-counts sorted-tag-alist])
- (cond
- [(null? remaining-tags-and-counts)
- (simple-format output-port "\n")]
- [else
- (let* ([current-tag-and-count (first remaining-tags-and-counts)]
- [key (alist-item-key current-tag-and-count)]
- [val (alist-item-value current-tag-and-count)])
- (simple-format output-port " ~a: ~a\n" key val))
- (iter (rest remaining-tags-and-counts))]))))))
- (define display-statistics
- (lambda* (vocabulary
- #:key
- (input-port (current-input-port))
- (output-port (current-output-port)))
- "Display statistics about the given VOCABULARY."
- (display-learned-statistic vocabulary
- #:input-port input-port
- #:output-port output-port)
- (display-tags-count-statistic vocabulary
- #:input-port input-port
- #:output-port output-port)))
|