vlist.scm 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. ;;; -*- mode: scheme; coding: utf-8; -*-
  2. ;;;
  3. ;;; Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
  4. ;;;
  5. ;;; This library is free software; you can redistribute it and/or
  6. ;;; modify it under the terms of the GNU Lesser General Public
  7. ;;; License as published by the Free Software Foundation; either
  8. ;;; version 3 of the License, or (at your option) any later version.
  9. ;;;
  10. ;;; This library is distributed in the hope that it will be useful,
  11. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. ;;; Lesser General Public License for more details.
  14. ;;;
  15. ;;; You should have received a copy of the GNU Lesser General Public
  16. ;;; License along with this library; if not, write to the Free Software
  17. ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. (define-module (ice-9 vlist)
  19. #:use-module (srfi srfi-1)
  20. #:use-module (srfi srfi-9)
  21. #:use-module (srfi srfi-9 gnu)
  22. #:use-module (srfi srfi-26)
  23. #:export (vlist? vlist-cons vlist-head vlist-tail vlist-null?
  24. vlist-null list->vlist vlist-ref vlist-drop vlist-take
  25. vlist-length vlist-fold vlist-fold-right vlist-map
  26. vlist-unfold vlist-unfold-right vlist-append
  27. vlist-reverse vlist-filter vlist-delete vlist->list
  28. vlist-for-each
  29. block-growth-factor
  30. vhash? vhash-cons vhash-consq vhash-consv
  31. vhash-assoc vhash-assq vhash-assv
  32. vhash-delete vhash-delq vhash-delv
  33. vhash-fold vhash-fold-right
  34. vhash-fold* vhash-foldq* vhash-foldv*
  35. alist->vhash))
  36. ;;; Author: Ludovic Courtès <ludo@gnu.org>
  37. ;;;
  38. ;;; Commentary:
  39. ;;;
  40. ;;; This module provides an implementations of vlists, a functional list-like
  41. ;;; data structure described by Phil Bagwell in "Fast Functional Lists,
  42. ;;; Hash-Lists, Dequeues and Variable-Length Arrays", EPFL Technical Report,
  43. ;;; 2002.
  44. ;;;
  45. ;;; The idea is to store vlist elements in increasingly large contiguous blocks
  46. ;;; (implemented as vectors here). These blocks are linked to one another using
  47. ;;; a pointer to the next block (called `block-base' here) and an offset within
  48. ;;; that block (`block-offset' here). The size of these blocks form a geometric
  49. ;;; series with ratio `block-growth-factor'.
  50. ;;;
  51. ;;; In the best case (e.g., using a vlist returned by `list->vlist'),
  52. ;;; elements from the first half of an N-element vlist are accessed in O(1)
  53. ;;; (assuming `block-growth-factor' is 2), and `vlist-length' takes only
  54. ;;; O(ln(N)). Furthermore, the data structure improves data locality since
  55. ;;; vlist elements are adjacent, which plays well with caches.
  56. ;;;
  57. ;;; Code:
  58. ;;;
  59. ;;; VList Blocks and Block Descriptors.
  60. ;;;
  61. (define block-growth-factor
  62. (let ((f (make-fluid)))
  63. (fluid-set! f 2)
  64. f))
  65. (define-syntax define-inline
  66. ;; Work around the lack of an inliner.
  67. (syntax-rules ()
  68. ((_ (name formals ...) body ...)
  69. (define-syntax name
  70. (syntax-rules ()
  71. ((_ formals ...)
  72. (begin body ...)))))))
  73. (define-inline (make-block base offset size hash-tab?)
  74. ;; Return a block (and block descriptor) of SIZE elements pointing to BASE
  75. ;; at OFFSET. If HASH-TAB? is true, a "hash table" is also added.
  76. ;; Note: We use `next-free' instead of `last-used' as suggested by Bagwell.
  77. ;; XXX: We could improve locality here by having a single vector but currently
  78. ;; the extra arithmetic outweighs the benefits (!).
  79. (vector (make-vector size)
  80. base offset size 0
  81. (and hash-tab? (make-vector size #f))))
  82. (define-syntax define-block-accessor
  83. (syntax-rules ()
  84. ((_ name index)
  85. (define-inline (name block)
  86. (vector-ref block index)))))
  87. (define-block-accessor block-content 0)
  88. (define-block-accessor block-base 1)
  89. (define-block-accessor block-offset 2)
  90. (define-block-accessor block-size 3)
  91. (define-block-accessor block-next-free 4)
  92. (define-block-accessor block-hash-table 5)
  93. (define-inline (increment-block-next-free! block)
  94. (vector-set! block 4
  95. (+ (block-next-free block) 1)))
  96. (define-inline (block-append! block value)
  97. ;; This is not thread-safe. To fix it, see Section 2.8 of the paper.
  98. (let ((offset (block-next-free block)))
  99. (increment-block-next-free! block)
  100. (vector-set! (block-content block) offset value)
  101. #t))
  102. (define-inline (block-ref block offset)
  103. (vector-ref (block-content block) offset))
  104. (define-inline (block-ref* block offset)
  105. (let ((v (block-ref block offset)))
  106. (if (block-hash-table block)
  107. (car v) ;; hide the vhash link
  108. v)))
  109. (define-inline (block-hash-table-ref block offset)
  110. (vector-ref (block-hash-table block) offset))
  111. (define-inline (block-hash-table-set! block offset value)
  112. (vector-set! (block-hash-table block) offset value))
  113. (define block-null
  114. ;; The null block.
  115. (make-block #f 0 0 #f))
  116. ;;;
  117. ;;; VLists.
  118. ;;;
  119. (define-record-type <vlist>
  120. ;; A vlist is just a base+offset pair pointing to a block.
  121. ;; XXX: Allocating a <vlist> record in addition to the block at each
  122. ;; `vlist-cons' call is inefficient. However, Bagwell's hack to avoid it
  123. ;; (Section 2.2) would require GC_ALL_INTERIOR_POINTERS, which would be a
  124. ;; performance hit for everyone.
  125. (make-vlist base offset)
  126. vlist?
  127. (base vlist-base)
  128. (offset vlist-offset))
  129. (set-record-type-printer! <vlist>
  130. (lambda (vl port)
  131. (cond ((vlist-null? vl)
  132. (format port "#<vlist ()>"))
  133. ((block-hash-table (vlist-base vl))
  134. (format port "#<vhash ~x ~a pairs>"
  135. (object-address vl)
  136. (vhash-fold (lambda (k v r)
  137. (+ 1 r))
  138. 0
  139. vl)))
  140. (else
  141. (format port "#<vlist ~a>"
  142. (vlist->list vl))))))
  143. (define vlist-null
  144. ;; The empty vlist.
  145. (make-vlist block-null 0))
  146. (define-inline (block-cons item vlist hash-tab?)
  147. (let loop ((base (vlist-base vlist))
  148. (offset (+ 1 (vlist-offset vlist))))
  149. (if (and (< offset (block-size base))
  150. (= offset (block-next-free base))
  151. (block-append! base item))
  152. (make-vlist base offset)
  153. (let ((size (cond ((eq? base block-null) 1)
  154. ((< offset (block-size base))
  155. ;; new vlist head
  156. 1)
  157. (else
  158. (* (fluid-ref block-growth-factor)
  159. (block-size base))))))
  160. ;; Prepend a new block pointing to BASE.
  161. (loop (make-block base (- offset 1) size hash-tab?)
  162. 0)))))
  163. (define (vlist-cons item vlist)
  164. "Return a new vlist with @var{item} as its head and @var{vlist} as its
  165. tail."
  166. ;; Note: Calling `vlist-cons' on a vhash will not do the right thing: it
  167. ;; doesn't box ITEM so that it can have the hidden "next" link used by
  168. ;; vhash items, and it passes `#f' as the HASH-TAB? argument to
  169. ;; `block-cons'. However, inserting all the checks here has an important
  170. ;; performance penalty, hence this choice.
  171. (block-cons item vlist #f))
  172. (define (vlist-head vlist)
  173. "Return the head of @var{vlist}."
  174. (let ((base (vlist-base vlist))
  175. (offset (vlist-offset vlist)))
  176. (block-ref* base offset)))
  177. (define (vlist-tail vlist)
  178. "Return the tail of @var{vlist}."
  179. (let ((base (vlist-base vlist))
  180. (offset (vlist-offset vlist)))
  181. (if (> offset 0)
  182. (make-vlist base (- offset 1))
  183. (make-vlist (block-base base)
  184. (block-offset base)))))
  185. (define (vlist-null? vlist)
  186. "Return true if @var{vlist} is empty."
  187. (let ((base (vlist-base vlist)))
  188. (and (not (block-base base))
  189. (= 0 (block-size base)))))
  190. ;;;
  191. ;;; VList Utilities.
  192. ;;;
  193. (define (list->vlist lst)
  194. "Return a new vlist whose contents correspond to @var{lst}."
  195. (vlist-reverse (fold vlist-cons vlist-null lst)))
  196. (define (vlist-fold proc init vlist)
  197. "Fold over @var{vlist}, calling @var{proc} for each element."
  198. ;; FIXME: Handle multiple lists.
  199. (let loop ((base (vlist-base vlist))
  200. (offset (vlist-offset vlist))
  201. (result init))
  202. (if (eq? base block-null)
  203. result
  204. (let* ((next (- offset 1))
  205. (done? (< next 0)))
  206. (loop (if done? (block-base base) base)
  207. (if done? (block-offset base) next)
  208. (proc (block-ref* base offset) result))))))
  209. (define (vlist-fold-right proc init vlist)
  210. "Fold over @var{vlist}, calling @var{proc} for each element, starting from
  211. the last element."
  212. (define len (vlist-length vlist))
  213. (let loop ((index (1- len))
  214. (result init))
  215. (if (< index 0)
  216. result
  217. (loop (1- index)
  218. (proc (vlist-ref vlist index) result)))))
  219. (define (vlist-reverse vlist)
  220. "Return a new @var{vlist} whose content are those of @var{vlist} in reverse
  221. order."
  222. (vlist-fold vlist-cons vlist-null vlist))
  223. (define (vlist-map proc vlist)
  224. "Map @var{proc} over the elements of @var{vlist} and return a new vlist."
  225. (vlist-fold (lambda (item result)
  226. (vlist-cons (proc item) result))
  227. vlist-null
  228. (vlist-reverse vlist)))
  229. (define (vlist->list vlist)
  230. "Return a new list whose contents match those of @var{vlist}."
  231. (vlist-fold-right cons '() vlist))
  232. (define (vlist-ref vlist index)
  233. "Return the element at index @var{index} in @var{vlist}."
  234. (let loop ((index index)
  235. (base (vlist-base vlist))
  236. (offset (vlist-offset vlist)))
  237. (if (<= index offset)
  238. (block-ref* base (- offset index))
  239. (loop (- index offset 1)
  240. (block-base base)
  241. (block-offset base)))))
  242. (define (vlist-drop vlist count)
  243. "Return a new vlist that does not contain the @var{count} first elements of
  244. @var{vlist}."
  245. (let loop ((count count)
  246. (base (vlist-base vlist))
  247. (offset (vlist-offset vlist)))
  248. (if (<= count offset)
  249. (make-vlist base (- offset count))
  250. (loop (- count offset 1)
  251. (block-base base)
  252. (block-offset base)))))
  253. (define (vlist-take vlist count)
  254. "Return a new vlist that contains only the @var{count} first elements of
  255. @var{vlist}."
  256. (let loop ((count count)
  257. (vlist vlist)
  258. (result vlist-null))
  259. (if (= 0 count)
  260. (vlist-reverse result)
  261. (loop (- count 1)
  262. (vlist-tail vlist)
  263. (vlist-cons (vlist-head vlist) result)))))
  264. (define (vlist-filter pred vlist)
  265. "Return a new vlist containing all the elements from @var{vlist} that
  266. satisfy @var{pred}."
  267. (vlist-fold-right (lambda (e v)
  268. (if (pred e)
  269. (vlist-cons e v)
  270. v))
  271. vlist-null
  272. vlist))
  273. (define* (vlist-delete x vlist #:optional (equal? equal?))
  274. "Return a new vlist corresponding to @var{vlist} without the elements
  275. @var{equal?} to @var{x}."
  276. (vlist-filter (lambda (e)
  277. (not (equal? e x)))
  278. vlist))
  279. (define (vlist-length vlist)
  280. "Return the length of @var{vlist}."
  281. (let loop ((base (vlist-base vlist))
  282. (len (vlist-offset vlist)))
  283. (if (eq? base block-null)
  284. len
  285. (loop (block-base base)
  286. (+ len 1 (block-offset base))))))
  287. (define* (vlist-unfold p f g seed
  288. #:optional (tail-gen (lambda (x) vlist-null)))
  289. "Return a new vlist. See the description of SRFI-1 `unfold' for details."
  290. (let uf ((seed seed))
  291. (if (p seed)
  292. (tail-gen seed)
  293. (vlist-cons (f seed)
  294. (uf (g seed))))))
  295. (define* (vlist-unfold-right p f g seed #:optional (tail vlist-null))
  296. "Return a new vlist. See the description of SRFI-1 `unfold-right' for
  297. details."
  298. (let uf ((seed seed) (lis tail))
  299. (if (p seed)
  300. lis
  301. (uf (g seed) (vlist-cons (f seed) lis)))))
  302. (define (vlist-append . vlists)
  303. "Append the given lists."
  304. (if (null? vlists)
  305. vlist-null
  306. (fold-right (lambda (vlist result)
  307. (vlist-fold-right (lambda (e v)
  308. (vlist-cons e v))
  309. result
  310. vlist))
  311. vlist-null
  312. vlists)))
  313. (define (vlist-for-each proc vlist)
  314. "Call @var{proc} on each element of @var{vlist}. The result is unspecified."
  315. (vlist-fold (lambda (item x)
  316. (proc item))
  317. (if #f #f)
  318. vlist))
  319. ;;;
  320. ;;; Hash Lists, aka. `VHash'.
  321. ;;;
  322. ;; Assume keys K1 and K2, H = hash(K1) = hash(K2), and two values V1 and V2
  323. ;; associated with K1 and K2, respectively. The resulting layout is a
  324. ;; follows:
  325. ;;
  326. ;; ,--------------------.
  327. ;; | ,-> (K1 . V1) ---. |
  328. ;; | | | |
  329. ;; | | (K2 . V2) <--' |
  330. ;; | | |
  331. ;; +-|------------------+
  332. ;; | | |
  333. ;; | | |
  334. ;; | `-- O <---------------H
  335. ;; | |
  336. ;; `--------------------'
  337. ;;
  338. ;; The bottom part is the "hash table" part of the vhash, as returned by
  339. ;; `block-hash-table'; the other half is the data part. O is the offset of
  340. ;; the first value associated with a key that hashes to H in the data part.
  341. ;; The (K1 . V1) pair has a "hidden" link to the (K2 . V2) pair; hiding the
  342. ;; link is handled by `block-ref'.
  343. ;; This API potentially requires users to repeat which hash function and which
  344. ;; equality predicate to use. This can lead to unpredictable results if they
  345. ;; are used in consistenly, e.g., between `vhash-cons' and `vhash-assoc', which
  346. ;; is undesirable, as argued in http://savannah.gnu.org/bugs/?22159 . OTOH, two
  347. ;; arguments can be made in favor of this API:
  348. ;;
  349. ;; - It's consistent with how alists are handled in SRFI-1.
  350. ;;
  351. ;; - In practice, users will probably consistenly use either the `q', the `v',
  352. ;; or the plain variant (`vlist-cons' and `vlist-assoc' without any optional
  353. ;; argument), i.e., they will rarely explicitly pass a hash function or
  354. ;; equality predicate.
  355. (define (vhash? obj)
  356. "Return true if @var{obj} is a hash list."
  357. (and (vlist? obj)
  358. (let ((base (vlist-base obj)))
  359. (and base
  360. (vector? (block-hash-table base))))))
  361. (define* (vhash-cons key value vhash #:optional (hash hash))
  362. "Return a new hash list based on @var{vhash} where @var{key} is associated
  363. with @var{value}. Use @var{hash} to compute @var{key}'s hash."
  364. (let* ((key+value (cons key value))
  365. (entry (cons key+value #f))
  366. (vlist (block-cons entry vhash #t))
  367. (base (vlist-base vlist))
  368. (khash (hash key (block-size base))))
  369. (let ((o (block-hash-table-ref base khash)))
  370. (if o (set-cdr! entry o)))
  371. (block-hash-table-set! base khash
  372. (vlist-offset vlist))
  373. vlist))
  374. (define vhash-consq (cut vhash-cons <> <> <> hashq))
  375. (define vhash-consv (cut vhash-cons <> <> <> hashv))
  376. (define-inline (%vhash-fold* proc init key vhash equal? hash)
  377. ;; Fold over all the values associated with KEY in VHASH.
  378. (define khash
  379. (let ((size (block-size (vlist-base vhash))))
  380. (and (> size 0) (hash key size))))
  381. (let loop ((base (vlist-base vhash))
  382. (khash khash)
  383. (offset (and khash
  384. (block-hash-table-ref (vlist-base vhash)
  385. khash)))
  386. (max-offset (vlist-offset vhash))
  387. (result init))
  388. (let ((answer (and offset (block-ref base offset))))
  389. (cond ((and (pair? answer)
  390. (<= offset max-offset)
  391. (let ((answer-key (caar answer)))
  392. (equal? key answer-key)))
  393. (let ((result (proc (cdar answer) result))
  394. (next-offset (cdr answer)))
  395. (loop base khash next-offset max-offset result)))
  396. ((and (pair? answer) (cdr answer))
  397. =>
  398. (lambda (next-offset)
  399. (loop base khash next-offset max-offset result)))
  400. (else
  401. (let ((next-base (block-base base)))
  402. (if (and next-base (> (block-size next-base) 0))
  403. (let* ((khash (hash key (block-size next-base)))
  404. (offset (block-hash-table-ref next-base khash)))
  405. (loop next-base khash offset (block-offset base)
  406. result))
  407. result)))))))
  408. (define* (vhash-fold* proc init key vhash
  409. #:optional (equal? equal?) (hash hash))
  410. "Fold over all the values associated with @var{key} in @var{vhash}, with each
  411. call to @var{proc} having the form @code{(proc value result)}, where
  412. @var{result} is the result of the previous call to @var{proc} and @var{init} the
  413. value of @var{result} for the first call to @var{proc}."
  414. (%vhash-fold* proc init key vhash equal? hash))
  415. (define (vhash-foldq* proc init key vhash)
  416. "Same as @code{vhash-fold*}, but using @code{hashq} and @code{eq?}."
  417. (%vhash-fold* proc init key vhash eq? hashq))
  418. (define (vhash-foldv* proc init key vhash)
  419. "Same as @code{vhash-fold*}, but using @code{hashv} and @code{eqv?}."
  420. (%vhash-fold* proc init key vhash eqv? hashv))
  421. (define-inline (%vhash-assoc key vhash equal? hash)
  422. ;; A specialization of `vhash-fold*' that stops when the first value
  423. ;; associated with KEY is found or when the end-of-list is reached. Inline to
  424. ;; make sure `vhash-assq' gets to use the `eq?' instruction instead of calling
  425. ;; the `eq?' subr.
  426. (define khash
  427. (let ((size (block-size (vlist-base vhash))))
  428. (and (> size 0) (hash key size))))
  429. (let loop ((base (vlist-base vhash))
  430. (khash khash)
  431. (offset (and khash
  432. (block-hash-table-ref (vlist-base vhash)
  433. khash)))
  434. (max-offset (vlist-offset vhash)))
  435. (let ((answer (and offset (block-ref base offset))))
  436. (cond ((and (pair? answer)
  437. (<= offset max-offset)
  438. (let ((answer-key (caar answer)))
  439. (equal? key answer-key)))
  440. (car answer))
  441. ((and (pair? answer) (cdr answer))
  442. =>
  443. (lambda (next-offset)
  444. (loop base khash next-offset max-offset)))
  445. (else
  446. (let ((next-base (block-base base)))
  447. (and next-base
  448. (> (block-size next-base) 0)
  449. (let* ((khash (hash key (block-size next-base)))
  450. (offset (block-hash-table-ref next-base khash)))
  451. (loop next-base khash offset
  452. (block-offset base))))))))))
  453. (define* (vhash-assoc key vhash #:optional (equal? equal?) (hash hash))
  454. "Return the first key/value pair from @var{vhash} whose key is equal to
  455. @var{key} according to the @var{equal?} equality predicate."
  456. (%vhash-assoc key vhash equal? hash))
  457. (define (vhash-assq key vhash)
  458. "Return the first key/value pair from @var{vhash} whose key is @code{eq?} to
  459. @var{key}."
  460. (%vhash-assoc key vhash eq? hashq))
  461. (define (vhash-assv key vhash)
  462. "Return the first key/value pair from @var{vhash} whose key is @code{eqv?} to
  463. @var{key}."
  464. (%vhash-assoc key vhash eqv? hashv))
  465. (define* (vhash-delete key vhash #:optional (equal? equal?) (hash hash))
  466. "Remove all associations from @var{vhash} with @var{key}, comparing keys
  467. with @var{equal?}."
  468. (if (vhash-assoc key vhash equal? hash)
  469. (vlist-fold (lambda (k+v result)
  470. (let ((k (car k+v))
  471. (v (cdr k+v)))
  472. (if (equal? k key)
  473. result
  474. (vhash-cons k v result hash))))
  475. vlist-null
  476. vhash)
  477. vhash))
  478. (define vhash-delq (cut vhash-delete <> <> eq? hashq))
  479. (define vhash-delv (cut vhash-delete <> <> eqv? hashv))
  480. (define (vhash-fold proc seed vhash)
  481. "Fold over the key/pair elements of @var{vhash}. For each pair call
  482. @var{proc} as @code{(@var{proc} key value result)}."
  483. (vlist-fold (lambda (key+value result)
  484. (proc (car key+value) (cdr key+value)
  485. result))
  486. seed
  487. vhash))
  488. (define (vhash-fold-right proc seed vhash)
  489. "Fold over the key/pair elements of @var{vhash}, starting from the 0th
  490. element. For each pair call @var{proc} as @code{(@var{proc} key value
  491. result)}."
  492. (vlist-fold-right (lambda (key+value result)
  493. (proc (car key+value) (cdr key+value)
  494. result))
  495. seed
  496. vhash))
  497. (define* (alist->vhash alist #:optional (hash hash))
  498. "Return the vhash corresponding to @var{alist}, an association list."
  499. (fold-right (lambda (pair result)
  500. (vhash-cons (car pair) (cdr pair) result hash))
  501. vlist-null
  502. alist))
  503. ;;; vlist.scm ends here