compact-table.scm 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. ; Part of Scheme 48 1.9. See file COPYING for notices and license.
  2. ; Authors: Mike Sperber, Robert Ransom
  3. ; Copyright (c) 2005-2006 by Basis Technology Corporation.
  4. ; A compact table is an encoding of a very large vector that has lots
  5. ; of recurring patterns. It was written for encoding Unicode tables.
  6. ; The vector is partitioned into blocks, and the blocks get assembled
  7. ; into a new compressed vector. Each time a new block gets added, the
  8. ; algorithm looks if the same block is already present in the
  9. ; compressed vector, or the compressed vector ends with a prefix of
  10. ; the new block. In the former case, nothing needs to get added. In
  11. ; the latter case, only the suffix needs to get added. At the same
  12. ; time, the algorithm computes a table with indices of the block
  13. ; beginnings.
  14. ; The algorithm can take a long time; little attempt at optimization
  15. ; has been made. It's mainly intended for offline computation as part
  16. ; of a build process.
  17. ; This tries to merge BLOCK onto REVERSE-BASE, sharing the prefix of
  18. ; BLOCK.
  19. ; returns new reverse list + index offset
  20. (define (compact-block block reverse-base)
  21. (let* ((block-size (length block))
  22. (base-block (reverse (take-upto reverse-base block-size)))
  23. (base-block-size (length base-block)))
  24. (let loop ((base-block base-block)
  25. (offset 0))
  26. (if (list-prefix? base-block block)
  27. (values (append (reverse (list-tail block (- base-block-size offset)))
  28. reverse-base)
  29. offset)
  30. (loop (cdr base-block) (+ 1 offset))))))
  31. ; GET-VALUE is a thunk that returns the next value of the input vector
  32. ; every time it gets called. BLOCK-SIZE is the size of the blocks in
  33. ; the algorithm.
  34. ; The procedure returns two values: the indices vector and a vector of
  35. ; the actual values.
  36. (define (compute-compact-table get-value block-size)
  37. (define (get-block)
  38. (let loop ((i 0) (rev-block '()))
  39. (cond
  40. ((>= i block-size)
  41. (reverse rev-block))
  42. ((get-value)
  43. => (lambda (value)
  44. (loop (+ 1 i) (cons value rev-block))))
  45. (else
  46. (reverse rev-block)))))
  47. (let loop ((reverse-values '())
  48. (reverse-indices '())
  49. (last-index 0)
  50. ;; cache for blocks that have already shown up twice
  51. ;; (reduces run time *a lot*)
  52. (bingo-block-alist '()))
  53. (let ((block (get-block)))
  54. (cond
  55. ((null? block)
  56. (values (list->vector (reverse reverse-indices))
  57. (list->vector (reverse reverse-values))))
  58. ((assoc block bingo-block-alist)
  59. => (lambda (pair)
  60. (loop reverse-values
  61. (cons (cdr pair) reverse-indices)
  62. last-index
  63. bingo-block-alist)))
  64. ((sublist-index (reverse block) reverse-values)
  65. => (lambda (rev-index)
  66. (loop reverse-values
  67. (cons (+ (- block-size (length block)) (- last-index rev-index))
  68. reverse-indices)
  69. last-index
  70. (cons (cons block (- last-index rev-index)) bingo-block-alist))))
  71. (else
  72. (call-with-values
  73. (lambda () (compact-block block reverse-values))
  74. (lambda (reverse-values offset)
  75. (loop reverse-values
  76. (cons (+ last-index offset) reverse-indices)
  77. (+ last-index offset)
  78. bingo-block-alist))))))))
  79. ; List utilities
  80. (define (sublist-index sublist list)
  81. (let loop ((list list)
  82. (index 0))
  83. (cond
  84. ((list-prefix? sublist list)
  85. index)
  86. ((null? list)
  87. #f)
  88. (else (loop (cdr list) (+ 1 index))))))
  89. (define (list-prefix? list-1 list-2)
  90. (cond
  91. ((null? list-1) #t)
  92. ((null? list-2) #f)
  93. ((equal? (car list-1) (car list-2))
  94. (list-prefix? (cdr list-1) (cdr list-2)))
  95. (else #f)))
  96. (define (take-upto list count)
  97. (let loop ((list list) (count count) (rev-result '()))
  98. (if (or (zero? count)
  99. (null? list))
  100. (reverse rev-result)
  101. (loop (cdr list) (- count 1) (cons (car list) rev-result)))))