123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457 |
- ;;; Hoot hashtables
- ;;; Copyright (C) 2023, 2024 David Thompson <dave@spritely.institute>
- ;;;
- ;;; Licensed under the Apache License, Version 2.0 (the "License");
- ;;; you may not use this file except in compliance with the License.
- ;;; You may obtain a copy of the License at
- ;;;
- ;;; http://www.apache.org/licenses/LICENSE-2.0
- ;;;
- ;;; Unless required by applicable law or agreed to in writing, software
- ;;; distributed under the License is distributed on an "AS IS" BASIS,
- ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ;;; See the License for the specific language governing permissions and
- ;;; limitations under the License.
- ;;; Commentary:
- ;;;
- ;;; R6RS-inspired hashtables.
- ;;;
- ;;; Code:
- (library (hoot hashtables)
- (export hashq
- hashv
- hash
- make-hashtable
- make-eq-hashtable
- make-eqv-hashtable
- hashtable?
- hashtable-hash
- hashtable-equiv
- hashtable-size
- hashtable-ref
- hashtable-set!
- hashtable-delete!
- hashtable-clear!
- hashtable-contains?
- hashtable-copy
- hashtable-keys
- hashtable-values
- hashtable-for-each
- hashtable-fold
- make-weak-key-hashtable
- weak-key-hashtable?
- weak-key-hashtable-ref
- weak-key-hashtable-set!
- weak-key-hashtable-delete!)
- (import (only (hoot primitives)
- %struct-ref %struct-vtable
- guile:hashq guile:hashv guile:hash)
- (hoot pairs)
- (hoot numbers)
- (hoot bitwise)
- (hoot bitvectors)
- (hoot bytevectors)
- (hoot eq)
- (hoot equal)
- (hoot inline-wasm)
- (hoot procedures)
- (hoot values)
- (hoot vectors)
- (hoot lists)
- (hoot records)
- (hoot strings)
- (hoot syntax)
- (hoot write)
- (hoot match)
- (hoot errors)
- (hoot cond-expand))
- (cond-expand
- (guile-vm
- (define (hashq key size) (guile:hashq key size))
- (define (hashv key size) (guile:hashv key size))
- (define (hash key size) (guile:hash key size)))
- (hoot
- (define (string-hash str)
- (%inline-wasm
- '(func (param $str (ref eq)) (result i64)
- (i64.extend_i32_u
- (call $string-hash
- (struct.get $string $str
- (ref.cast $string (local.get $str))))))
- str))
- (define (%hashq key)
- (%inline-wasm
- '(func (param $key (ref eq)) (result i64)
- (i64.extend_i32_u
- (call $hashq (local.get $key))))
- key))
- (define (%hashv key)
- (if (number? key)
- ;; Use hashq for integers, otherwise convert to a string and
- ;; hash that.
- (if (integer? key)
- (if (exact? key)
- (%hashq key)
- (%hashq (exact key)))
- (string-hash (number->string key)))
- (%hashq key)))
- (define (%hash key)
- ;; Simple, non-commutative hash code combiner.
- (define (combine-hashes h1 h2)
- (logxor (ash h1 5) h2))
- ;; For hashing records:
- (define (assq-ref alist k)
- (and (pair? alist)
- (if (eq? (caar alist) k)
- (cdar alist)
- (assq-ref (cdr alist) k))))
- (define (record-nfields record)
- (%struct-ref (%struct-vtable record) 0))
- (define (record-properties record)
- (%struct-ref (%struct-vtable record) 4))
- (define (record-opaque? record)
- (assq-ref (record-properties record) 'opaque))
- ;; This recursive hashing algorithm with effort limit is inspired
- ;; by Chez Scheme.
- (define (hash key k)
- (let ((k (- k 1)))
- (cond
- ((<= k 0) ; out of hash juice :(
- (values (%hashv key) 0))
- ((string? key)
- (values (string-hash key) k))
- ((pair? key)
- (let ((k/2 (ash (+ k 1) -1)))
- (call-with-values (lambda () (hash (car key) k/2))
- (lambda (h1 k*)
- (call-with-values (lambda () (hash (cdr key) (+ (- k k/2) k*)))
- (lambda (h2 k)
- (values (combine-hashes h1 h2) k)))))))
- ((vector? key)
- (let ((seed #xbeadcafe))
- (let lp ((i 0) (h seed) (k k))
- (if (and (< i (vector-length key)) (> k 0))
- (let ((k/2 (ash (+ k 1) -1)))
- (call-with-values (lambda () (hash (vector-ref key i) k/2))
- (lambda (h* k*)
- (lp (+ i 1) (combine-hashes h h*) (+ (- k k/2) k*)))))
- (values h k)))))
- ((bytevector? key)
- (values (%inline-wasm
- '(func (param $bv (ref eq)) (result i64)
- (i64.extend_i32_u
- (call $hash-bytevector
- (ref.cast $bytevector (local.get $bv)))))
- key)
- k))
- ((bitvector? key)
- (values (%inline-wasm
- '(func (param $bv (ref eq)) (result i64)
- (i64.extend_i32_u
- (call $hash-bitvector
- (ref.cast $bitvector (local.get $bv)))))
- key)
- k))
- ((record? key)
- (if (record-opaque? key)
- (values (%hashq key) k)
- (let ((nfields (record-nfields key))
- (seed #xfacefeed))
- (let lp ((i 0) (h seed) (k k))
- (if (and (< i nfields) (> k 0))
- (let ((k/2 (ash k -1)))
- (call-with-values (lambda ()
- (hash (%struct-ref key i) k/2))
- (lambda (h* k*)
- (lp (+ i 1) (combine-hashes h h*) (+ (- k k/2) k*)))))
- (values h k))))))
- (else
- (values (%hashv key) k)))))
- (call-with-values (lambda () (hash key 64))
- (lambda (hash-code k)
- hash-code)))
- (define max-hash-size (1- (ash 1 32)))
- (define (hashq key size)
- (check-size size max-hash-size 'hashq)
- (modulo (%hashq key) size))
- (define (hashv key size)
- (check-size size max-hash-size 'hashv)
- (modulo (%hashv key) size))
- (define (hash key size)
- (check-size size max-hash-size 'hash)
- (modulo (%hash key) size))))
- ;; Numbers taken from https://planetmath.org/goodhashtableprimes
- (define %bucket-sizes
- #(53 97 193 389 769 1543 3079 6151 12289 24593 98317 196613 393241 786433 1572869))
- (define %min-buckets 53)
- (define (lower-bound k)
- (quotient k 4))
- (define (upper-bound k)
- (quotient (* k 9) 10))
- (define (optimal-buckets k)
- (let ((last (- (vector-length %bucket-sizes) 1)))
- (let lp ((idx 0))
- (if (= idx last)
- (vector-ref %bucket-sizes last)
- (let ((size (vector-ref %bucket-sizes idx)))
- (if (> k (upper-bound size))
- (lp (+ idx 1))
- size))))))
- (define-record-type <hashtable>
- #:printer (lambda (table port)
- (display "#<hashtable size: " port)
- (display (hashtable-size table) port)
- (display ">" port))
- (%make-hashtable hash equiv size buckets lower upper)
- hashtable?
- (hash hashtable-hash)
- (equiv hashtable-equiv)
- (size hashtable-size set-hashtable-size!)
- (buckets hashtable-buckets set-hashtable-buckets!)
- ;; Lower and upper bounds for growing/shrinking
- (lower hashtable-lower set-hashtable-lower!)
- (upper hashtable-upper set-hashtable-upper!))
- (define* (make-hashtable #:optional (hash hash) (equiv equal?))
- "Return a new, empty hashtable that uses the hash procedure @var{hash}
- and equivalence procedure @var{equiv}."
- (%make-hashtable hash equiv 0 (make-vector %min-buckets '())
- 0 (upper-bound %min-buckets)))
- (define (make-eq-hashtable)
- "Return a new, empty hashtable that uses @code{eq?} as the equivalence
- function and hashes keys accordingly."
- (make-hashtable hashq eq?))
- (define (make-eqv-hashtable)
- "Return a new, empty hashtable that uses @code{eqv?} as the equivalence
- function and hashes keys accordingly."
- (make-hashtable hashv eqv?))
- (define* (hashtable-ref table key #:optional default)
- "Return the value associated with @var{key} in @var{table}, or
- @var{default} if there is no such association."
- (let ((hash (hashtable-hash table))
- (equiv? (hashtable-equiv table))
- (buckets (hashtable-buckets table)))
- (let lp ((chain (vector-ref buckets (hash key (vector-length buckets)))))
- (match chain
- (() default)
- (((other-key . val) . rest)
- (if (equiv? key other-key)
- val
- (lp rest)))))))
- (define (hashtable-resize! table k)
- (let ((old (hashtable-buckets table))
- (new (make-vector k '()))
- (hash (hashtable-hash table)))
- (set-hashtable-lower! table (if (eq? k %min-buckets) 0 (lower-bound k)))
- (set-hashtable-upper! table (upper-bound k))
- (set-hashtable-buckets! table new)
- ;; Rehash all key/value pairs.
- (do ((idx 0 (+ idx 1)))
- ((= idx (vector-length old)))
- (let lp ((chain (vector-ref old idx)))
- (match chain
- (() (values))
- (((and link (key . _)) . rest)
- (let ((new-idx (hash key k)))
- (vector-set! new new-idx (cons link (vector-ref new new-idx)))
- (lp rest))))))))
- (define (hashtable-resize-maybe! table)
- (let ((size (hashtable-size table))
- (lower (hashtable-lower table))
- (upper (hashtable-upper table)))
- (when (or (< size lower) (> size upper))
- (hashtable-resize! table (optimal-buckets size)))))
- (define (hashtable-set! table key val)
- "Associate @{val} with @var{key} in @var{table}, potentially
- overwriting any previous association with @var{key}."
- (let* ((hash (hashtable-hash table))
- (equiv? (hashtable-equiv table))
- (size (hashtable-size table))
- (buckets (hashtable-buckets table))
- (idx (hash key (vector-length buckets)))
- (chain (vector-ref buckets idx)))
- (let lp ((chain* chain))
- (match chain*
- (()
- (vector-set! buckets idx (cons (cons key val) chain))
- (set-hashtable-size! table (+ size 1))
- (hashtable-resize-maybe! table))
- (((and link (other-key . _)) . rest)
- (if (equiv? key other-key)
- (set-cdr! link val)
- (lp rest))))))
- (values))
- (define (hashtable-delete! table key)
- "Remove the association with @var{key} in @var{table}, if one exists."
- (let* ((hash (hashtable-hash table))
- (equiv? (hashtable-equiv table))
- (size (hashtable-size table))
- (buckets (hashtable-buckets table))
- (idx (hash key (vector-length buckets))))
- (vector-set! buckets idx
- (let lp ((chain (vector-ref buckets idx)))
- (match chain
- (() '())
- (((and link (other-key . _)) . rest)
- (if (equiv? key other-key)
- (begin
- (set-hashtable-size! table (- size 1))
- rest)
- (cons link (lp rest)))))))
- (hashtable-resize-maybe! table))
- (values))
- (define* (hashtable-clear! table)
- "Remove all items from @var{table}."
- (vector-fill! (hashtable-buckets table) '())
- (set-hashtable-size! table 0)
- (values))
- (define (hashtable-contains? table key)
- "Return #t if @var{key} has an associated value in @var{table}."
- (let ((hash (hashtable-hash table))
- (equiv? (hashtable-equiv table))
- (buckets (hashtable-buckets table)))
- (let lp ((chain (vector-ref buckets (hash key (vector-length buckets)))))
- (match chain
- (() #f)
- (((other-key . _) . rest)
- (or (equiv? key other-key) (lp rest)))))))
- (define* (hashtable-copy table)
- "Return a copy of @var{table}."
- (let* ((buckets (hashtable-buckets table))
- (k (vector-length buckets))
- (buckets* (make-vector k))
- (table* (%make-hashtable (hashtable-hash table)
- (hashtable-equiv table)
- (hashtable-size table)
- buckets*
- (hashtable-lower table)
- (hashtable-upper table))))
- (do ((i 0 (+ i 1)))
- ((= i k))
- (vector-set! buckets* i
- (map (lambda (link)
- (cons (car link) (cdr link)))
- (vector-ref buckets i))))
- table*))
- (define (hashtable-keys table)
- "Return a list of keys in @var{table}."
- (hashtable-fold (lambda (key val result)
- (cons key result))
- '() table))
- (define (hashtable-values table)
- "Return a list of values in @var{table}."
- (hashtable-fold (lambda (key val result)
- (cons val result))
- '() table))
- (define (hashtable-for-each proc table)
- "Apply @var{proc} to each key/value association in @var{table}.
- Each call is of the form @code{(proc key value)}."
- (let ((buckets (hashtable-buckets table)))
- (do ((idx 0 (+ idx 1)))
- ((= idx (vector-length buckets)))
- (let lp ((chain (vector-ref buckets idx)))
- (match chain
- (() (values))
- (((key . val) . rest)
- (proc key val)
- (lp rest)))))))
- (define (hashtable-fold proc init table)
- "Accumulate a result by applying @var{proc} with each key/value
- association in @var{table} and the result of the previous @var{proc}
- call. Each call is of the form @code{(proc key value prev)}. For the
- first call, @code{prev} is the initial value @var{init}."
- (let ((buckets (hashtable-buckets table)))
- (let bucket-lp ((idx 0) (result init))
- (if (< idx (vector-length buckets))
- (bucket-lp (+ idx 1)
- (let chain-lp ((chain (vector-ref buckets idx))
- (result result))
- (match chain
- (() result)
- (((key . val) . rest)
- (chain-lp rest (proc key val result))))))
- result))))
- ;; Weak key hashtables
- (define (make-weak-key-hashtable)
- (%inline-wasm
- '(func (result (ref eq))
- (struct.new $weak-table
- (i32.const 0)
- (call $make-weak-map)))))
- (define (weak-key-hashtable? obj)
- (%inline-wasm
- '(func (param $obj (ref eq)) (result (ref eq))
- (if (ref eq)
- (ref.test $weak-table (local.get $obj))
- (then (ref.i31 (i32.const 17)))
- (else (ref.i31 (i32.const 1)))))
- obj))
- (define* (weak-key-hashtable-ref table key #:optional default)
- (check-type table weak-key-hashtable? 'weak-key-hashtable-ref)
- (%inline-wasm
- '(func (param $table (ref eq)) (param $key (ref eq))
- (param $default (ref eq)) (result (ref eq))
- (call $weak-map-get
- (struct.get $weak-table $val
- (ref.cast $weak-table (local.get $table)))
- (local.get $key)
- (local.get $default)))
- table key default))
- (define (weak-key-hashtable-set! table key value)
- (check-type table weak-key-hashtable? 'weak-key-hashtable-set!)
- (%inline-wasm
- '(func (param $table (ref eq)) (param $key (ref eq)) (param $val (ref eq))
- (call $weak-map-set
- (struct.get $weak-table $val
- (ref.cast $weak-table (local.get $table)))
- (local.get $key)
- (local.get $val)))
- table key value))
- (define (weak-key-hashtable-delete! table key)
- (check-type table weak-key-hashtable? 'weak-key-hashtable-delete!)
- (%inline-wasm
- '(func (param $table (ref eq)) (param $key (ref eq))
- (call $weak-map-delete
- (struct.get $weak-table $val
- (ref.cast $weak-table (local.get $table)))
- (local.get $key))
- (drop))
- table key)))
|