hashtables.scm 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457
  1. ;;; Hoot hashtables
  2. ;;; Copyright (C) 2023, 2024 David Thompson <dave@spritely.institute>
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; R6RS-inspired hashtables.
  18. ;;;
  19. ;;; Code:
  20. (library (hoot hashtables)
  21. (export hashq
  22. hashv
  23. hash
  24. make-hashtable
  25. make-eq-hashtable
  26. make-eqv-hashtable
  27. hashtable?
  28. hashtable-hash
  29. hashtable-equiv
  30. hashtable-size
  31. hashtable-ref
  32. hashtable-set!
  33. hashtable-delete!
  34. hashtable-clear!
  35. hashtable-contains?
  36. hashtable-copy
  37. hashtable-keys
  38. hashtable-values
  39. hashtable-for-each
  40. hashtable-fold
  41. make-weak-key-hashtable
  42. weak-key-hashtable?
  43. weak-key-hashtable-ref
  44. weak-key-hashtable-set!
  45. weak-key-hashtable-delete!)
  46. (import (only (hoot primitives)
  47. %struct-ref %struct-vtable
  48. guile:hashq guile:hashv guile:hash)
  49. (hoot pairs)
  50. (hoot numbers)
  51. (hoot bitwise)
  52. (hoot bitvectors)
  53. (hoot bytevectors)
  54. (hoot eq)
  55. (hoot equal)
  56. (hoot inline-wasm)
  57. (hoot procedures)
  58. (hoot values)
  59. (hoot vectors)
  60. (hoot lists)
  61. (hoot records)
  62. (hoot strings)
  63. (hoot syntax)
  64. (hoot write)
  65. (hoot match)
  66. (hoot errors)
  67. (hoot cond-expand))
  68. (cond-expand
  69. (guile-vm
  70. (define (hashq key size) (guile:hashq key size))
  71. (define (hashv key size) (guile:hashv key size))
  72. (define (hash key size) (guile:hash key size)))
  73. (hoot
  74. (define (string-hash str)
  75. (%inline-wasm
  76. '(func (param $str (ref eq)) (result i64)
  77. (i64.extend_i32_u
  78. (call $string-hash
  79. (struct.get $string $str
  80. (ref.cast $string (local.get $str))))))
  81. str))
  82. (define (%hashq key)
  83. (%inline-wasm
  84. '(func (param $key (ref eq)) (result i64)
  85. (i64.extend_i32_u
  86. (call $hashq (local.get $key))))
  87. key))
  88. (define (%hashv key)
  89. (if (number? key)
  90. ;; Use hashq for integers, otherwise convert to a string and
  91. ;; hash that.
  92. (if (integer? key)
  93. (if (exact? key)
  94. (%hashq key)
  95. (%hashq (exact key)))
  96. (string-hash (number->string key)))
  97. (%hashq key)))
  98. (define (%hash key)
  99. ;; Simple, non-commutative hash code combiner.
  100. (define (combine-hashes h1 h2)
  101. (logxor (ash h1 5) h2))
  102. ;; For hashing records:
  103. (define (assq-ref alist k)
  104. (and (pair? alist)
  105. (if (eq? (caar alist) k)
  106. (cdar alist)
  107. (assq-ref (cdr alist) k))))
  108. (define (record-nfields record)
  109. (%struct-ref (%struct-vtable record) 0))
  110. (define (record-properties record)
  111. (%struct-ref (%struct-vtable record) 4))
  112. (define (record-opaque? record)
  113. (assq-ref (record-properties record) 'opaque))
  114. ;; This recursive hashing algorithm with effort limit is inspired
  115. ;; by Chez Scheme.
  116. (define (hash key k)
  117. (let ((k (- k 1)))
  118. (cond
  119. ((<= k 0) ; out of hash juice :(
  120. (values (%hashv key) 0))
  121. ((string? key)
  122. (values (string-hash key) k))
  123. ((pair? key)
  124. (let ((k/2 (ash (+ k 1) -1)))
  125. (call-with-values (lambda () (hash (car key) k/2))
  126. (lambda (h1 k*)
  127. (call-with-values (lambda () (hash (cdr key) (+ (- k k/2) k*)))
  128. (lambda (h2 k)
  129. (values (combine-hashes h1 h2) k)))))))
  130. ((vector? key)
  131. (let ((seed #xbeadcafe))
  132. (let lp ((i 0) (h seed) (k k))
  133. (if (and (< i (vector-length key)) (> k 0))
  134. (let ((k/2 (ash (+ k 1) -1)))
  135. (call-with-values (lambda () (hash (vector-ref key i) k/2))
  136. (lambda (h* k*)
  137. (lp (+ i 1) (combine-hashes h h*) (+ (- k k/2) k*)))))
  138. (values h k)))))
  139. ((bytevector? key)
  140. (values (%inline-wasm
  141. '(func (param $bv (ref eq)) (result i64)
  142. (i64.extend_i32_u
  143. (call $hash-bytevector
  144. (ref.cast $bytevector (local.get $bv)))))
  145. key)
  146. k))
  147. ((bitvector? key)
  148. (values (%inline-wasm
  149. '(func (param $bv (ref eq)) (result i64)
  150. (i64.extend_i32_u
  151. (call $hash-bitvector
  152. (ref.cast $bitvector (local.get $bv)))))
  153. key)
  154. k))
  155. ((record? key)
  156. (if (record-opaque? key)
  157. (values (%hashq key) k)
  158. (let ((nfields (record-nfields key))
  159. (seed #xfacefeed))
  160. (let lp ((i 0) (h seed) (k k))
  161. (if (and (< i nfields) (> k 0))
  162. (let ((k/2 (ash k -1)))
  163. (call-with-values (lambda ()
  164. (hash (%struct-ref key i) k/2))
  165. (lambda (h* k*)
  166. (lp (+ i 1) (combine-hashes h h*) (+ (- k k/2) k*)))))
  167. (values h k))))))
  168. (else
  169. (values (%hashv key) k)))))
  170. (call-with-values (lambda () (hash key 64))
  171. (lambda (hash-code k)
  172. hash-code)))
  173. (define max-hash-size (1- (ash 1 32)))
  174. (define (hashq key size)
  175. (check-size size max-hash-size 'hashq)
  176. (modulo (%hashq key) size))
  177. (define (hashv key size)
  178. (check-size size max-hash-size 'hashv)
  179. (modulo (%hashv key) size))
  180. (define (hash key size)
  181. (check-size size max-hash-size 'hash)
  182. (modulo (%hash key) size))))
  183. ;; Numbers taken from https://planetmath.org/goodhashtableprimes
  184. (define %bucket-sizes
  185. #(53 97 193 389 769 1543 3079 6151 12289 24593 98317 196613 393241 786433 1572869))
  186. (define %min-buckets 53)
  187. (define (lower-bound k)
  188. (quotient k 4))
  189. (define (upper-bound k)
  190. (quotient (* k 9) 10))
  191. (define (optimal-buckets k)
  192. (let ((last (- (vector-length %bucket-sizes) 1)))
  193. (let lp ((idx 0))
  194. (if (= idx last)
  195. (vector-ref %bucket-sizes last)
  196. (let ((size (vector-ref %bucket-sizes idx)))
  197. (if (> k (upper-bound size))
  198. (lp (+ idx 1))
  199. size))))))
  200. (define-record-type <hashtable>
  201. #:printer (lambda (table port)
  202. (display "#<hashtable size: " port)
  203. (display (hashtable-size table) port)
  204. (display ">" port))
  205. (%make-hashtable hash equiv size buckets lower upper)
  206. hashtable?
  207. (hash hashtable-hash)
  208. (equiv hashtable-equiv)
  209. (size hashtable-size set-hashtable-size!)
  210. (buckets hashtable-buckets set-hashtable-buckets!)
  211. ;; Lower and upper bounds for growing/shrinking
  212. (lower hashtable-lower set-hashtable-lower!)
  213. (upper hashtable-upper set-hashtable-upper!))
  214. (define* (make-hashtable #:optional (hash hash) (equiv equal?))
  215. "Return a new, empty hashtable that uses the hash procedure @var{hash}
  216. and equivalence procedure @var{equiv}."
  217. (%make-hashtable hash equiv 0 (make-vector %min-buckets '())
  218. 0 (upper-bound %min-buckets)))
  219. (define (make-eq-hashtable)
  220. "Return a new, empty hashtable that uses @code{eq?} as the equivalence
  221. function and hashes keys accordingly."
  222. (make-hashtable hashq eq?))
  223. (define (make-eqv-hashtable)
  224. "Return a new, empty hashtable that uses @code{eqv?} as the equivalence
  225. function and hashes keys accordingly."
  226. (make-hashtable hashv eqv?))
  227. (define* (hashtable-ref table key #:optional default)
  228. "Return the value associated with @var{key} in @var{table}, or
  229. @var{default} if there is no such association."
  230. (let ((hash (hashtable-hash table))
  231. (equiv? (hashtable-equiv table))
  232. (buckets (hashtable-buckets table)))
  233. (let lp ((chain (vector-ref buckets (hash key (vector-length buckets)))))
  234. (match chain
  235. (() default)
  236. (((other-key . val) . rest)
  237. (if (equiv? key other-key)
  238. val
  239. (lp rest)))))))
  240. (define (hashtable-resize! table k)
  241. (let ((old (hashtable-buckets table))
  242. (new (make-vector k '()))
  243. (hash (hashtable-hash table)))
  244. (set-hashtable-lower! table (if (eq? k %min-buckets) 0 (lower-bound k)))
  245. (set-hashtable-upper! table (upper-bound k))
  246. (set-hashtable-buckets! table new)
  247. ;; Rehash all key/value pairs.
  248. (do ((idx 0 (+ idx 1)))
  249. ((= idx (vector-length old)))
  250. (let lp ((chain (vector-ref old idx)))
  251. (match chain
  252. (() (values))
  253. (((and link (key . _)) . rest)
  254. (let ((new-idx (hash key k)))
  255. (vector-set! new new-idx (cons link (vector-ref new new-idx)))
  256. (lp rest))))))))
  257. (define (hashtable-resize-maybe! table)
  258. (let ((size (hashtable-size table))
  259. (lower (hashtable-lower table))
  260. (upper (hashtable-upper table)))
  261. (when (or (< size lower) (> size upper))
  262. (hashtable-resize! table (optimal-buckets size)))))
  263. (define (hashtable-set! table key val)
  264. "Associate @{val} with @var{key} in @var{table}, potentially
  265. overwriting any previous association with @var{key}."
  266. (let* ((hash (hashtable-hash table))
  267. (equiv? (hashtable-equiv table))
  268. (size (hashtable-size table))
  269. (buckets (hashtable-buckets table))
  270. (idx (hash key (vector-length buckets)))
  271. (chain (vector-ref buckets idx)))
  272. (let lp ((chain* chain))
  273. (match chain*
  274. (()
  275. (vector-set! buckets idx (cons (cons key val) chain))
  276. (set-hashtable-size! table (+ size 1))
  277. (hashtable-resize-maybe! table))
  278. (((and link (other-key . _)) . rest)
  279. (if (equiv? key other-key)
  280. (set-cdr! link val)
  281. (lp rest))))))
  282. (values))
  283. (define (hashtable-delete! table key)
  284. "Remove the association with @var{key} in @var{table}, if one exists."
  285. (let* ((hash (hashtable-hash table))
  286. (equiv? (hashtable-equiv table))
  287. (size (hashtable-size table))
  288. (buckets (hashtable-buckets table))
  289. (idx (hash key (vector-length buckets))))
  290. (vector-set! buckets idx
  291. (let lp ((chain (vector-ref buckets idx)))
  292. (match chain
  293. (() '())
  294. (((and link (other-key . _)) . rest)
  295. (if (equiv? key other-key)
  296. (begin
  297. (set-hashtable-size! table (- size 1))
  298. rest)
  299. (cons link (lp rest)))))))
  300. (hashtable-resize-maybe! table))
  301. (values))
  302. (define* (hashtable-clear! table)
  303. "Remove all items from @var{table}."
  304. (vector-fill! (hashtable-buckets table) '())
  305. (set-hashtable-size! table 0)
  306. (values))
  307. (define (hashtable-contains? table key)
  308. "Return #t if @var{key} has an associated value in @var{table}."
  309. (let ((hash (hashtable-hash table))
  310. (equiv? (hashtable-equiv table))
  311. (buckets (hashtable-buckets table)))
  312. (let lp ((chain (vector-ref buckets (hash key (vector-length buckets)))))
  313. (match chain
  314. (() #f)
  315. (((other-key . _) . rest)
  316. (or (equiv? key other-key) (lp rest)))))))
  317. (define* (hashtable-copy table)
  318. "Return a copy of @var{table}."
  319. (let* ((buckets (hashtable-buckets table))
  320. (k (vector-length buckets))
  321. (buckets* (make-vector k))
  322. (table* (%make-hashtable (hashtable-hash table)
  323. (hashtable-equiv table)
  324. (hashtable-size table)
  325. buckets*
  326. (hashtable-lower table)
  327. (hashtable-upper table))))
  328. (do ((i 0 (+ i 1)))
  329. ((= i k))
  330. (vector-set! buckets* i
  331. (map (lambda (link)
  332. (cons (car link) (cdr link)))
  333. (vector-ref buckets i))))
  334. table*))
  335. (define (hashtable-keys table)
  336. "Return a list of keys in @var{table}."
  337. (hashtable-fold (lambda (key val result)
  338. (cons key result))
  339. '() table))
  340. (define (hashtable-values table)
  341. "Return a list of values in @var{table}."
  342. (hashtable-fold (lambda (key val result)
  343. (cons val result))
  344. '() table))
  345. (define (hashtable-for-each proc table)
  346. "Apply @var{proc} to each key/value association in @var{table}.
  347. Each call is of the form @code{(proc key value)}."
  348. (let ((buckets (hashtable-buckets table)))
  349. (do ((idx 0 (+ idx 1)))
  350. ((= idx (vector-length buckets)))
  351. (let lp ((chain (vector-ref buckets idx)))
  352. (match chain
  353. (() (values))
  354. (((key . val) . rest)
  355. (proc key val)
  356. (lp rest)))))))
  357. (define (hashtable-fold proc init table)
  358. "Accumulate a result by applying @var{proc} with each key/value
  359. association in @var{table} and the result of the previous @var{proc}
  360. call. Each call is of the form @code{(proc key value prev)}. For the
  361. first call, @code{prev} is the initial value @var{init}."
  362. (let ((buckets (hashtable-buckets table)))
  363. (let bucket-lp ((idx 0) (result init))
  364. (if (< idx (vector-length buckets))
  365. (bucket-lp (+ idx 1)
  366. (let chain-lp ((chain (vector-ref buckets idx))
  367. (result result))
  368. (match chain
  369. (() result)
  370. (((key . val) . rest)
  371. (chain-lp rest (proc key val result))))))
  372. result))))
  373. ;; Weak key hashtables
  374. (define (make-weak-key-hashtable)
  375. (%inline-wasm
  376. '(func (result (ref eq))
  377. (struct.new $weak-table
  378. (i32.const 0)
  379. (call $make-weak-map)))))
  380. (define (weak-key-hashtable? obj)
  381. (%inline-wasm
  382. '(func (param $obj (ref eq)) (result (ref eq))
  383. (if (ref eq)
  384. (ref.test $weak-table (local.get $obj))
  385. (then (ref.i31 (i32.const 17)))
  386. (else (ref.i31 (i32.const 1)))))
  387. obj))
  388. (define* (weak-key-hashtable-ref table key #:optional default)
  389. (check-type table weak-key-hashtable? 'weak-key-hashtable-ref)
  390. (%inline-wasm
  391. '(func (param $table (ref eq)) (param $key (ref eq))
  392. (param $default (ref eq)) (result (ref eq))
  393. (call $weak-map-get
  394. (struct.get $weak-table $val
  395. (ref.cast $weak-table (local.get $table)))
  396. (local.get $key)
  397. (local.get $default)))
  398. table key default))
  399. (define (weak-key-hashtable-set! table key value)
  400. (check-type table weak-key-hashtable? 'weak-key-hashtable-set!)
  401. (%inline-wasm
  402. '(func (param $table (ref eq)) (param $key (ref eq)) (param $val (ref eq))
  403. (call $weak-map-set
  404. (struct.get $weak-table $val
  405. (ref.cast $weak-table (local.get $table)))
  406. (local.get $key)
  407. (local.get $val)))
  408. table key value))
  409. (define (weak-key-hashtable-delete! table key)
  410. (check-type table weak-key-hashtable? 'weak-key-hashtable-delete!)
  411. (%inline-wasm
  412. '(func (param $table (ref eq)) (param $key (ref eq))
  413. (call $weak-map-delete
  414. (struct.get $weak-table $val
  415. (ref.cast $weak-table (local.get $table)))
  416. (local.get $key))
  417. (drop))
  418. table key)))