xz.scm 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. ;;; Disarchive
  2. ;;; Copyright © 2021 Timothy Sample <samplet@ngyro.com>
  3. ;;;
  4. ;;; This file is part of Disarchive.
  5. ;;;
  6. ;;; Disarchive is free software: you can redistribute it and/or modify
  7. ;;; it under the terms of the GNU General Public License as published by
  8. ;;; the Free Software Foundation, either version 3 of the License, or
  9. ;;; (at your option) any later version.
  10. ;;;
  11. ;;; Disarchive is distributed in the hope that it will be useful,
  12. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. ;;; GNU General Public License for more details.
  15. ;;;
  16. ;;; You should have received a copy of the GNU General Public License
  17. ;;; along with Disarchive. If not, see <http://www.gnu.org/licenses/>.
  18. (define-module (disarchive kinds xz)
  19. #:use-module (disarchive utils)
  20. #:use-module (ice-9 binary-ports)
  21. #:use-module (ice-9 match)
  22. #:use-module (rnrs bytevectors)
  23. #:use-module (srfi srfi-1)
  24. #:use-module (srfi srfi-9 gnu)
  25. #:use-module (srfi srfi-43)
  26. #:export ((bytevector-crc32 . bytevector-xz-crc32)
  27. xz-integer-length
  28. encode-xz-integer
  29. decode-xz-integer
  30. make-xz-stream-header
  31. xz-stream-header?
  32. xz-stream-header-check-type
  33. xz-stream-header-reserved
  34. xz-stream-header-crc32
  35. decode-xz-stream-header
  36. encode-xz-stream-header
  37. make-xz-stream-footer
  38. xz-stream-footer?
  39. xz-stream-footer-check-type
  40. xz-stream-footer-reserved
  41. xz-stream-footer-backward-size
  42. xz-stream-footer-crc32
  43. decode-xz-stream-footer
  44. encode-xz-stream-footer
  45. make-xz-filter-flags
  46. xz-filter-flags?
  47. xz-filter-flags-id
  48. xz-filter-flags-properties
  49. decode-xz-filter-flags
  50. encode-xz-filter-flags
  51. make-xz-block-header
  52. xz-block-header?
  53. xz-block-header-reserved
  54. xz-block-header-compressed-size
  55. xz-block-header-uncompressed-size
  56. xz-block-header-filters
  57. xz-block-header-padding
  58. set-xz-block-header-padding
  59. xz-block-header-crc32
  60. xz-block-header-size
  61. decode-xz-block-header
  62. encode-xz-block-header
  63. make-xz-index-record
  64. xz-index-record?
  65. xz-index-record-unpadded-size
  66. xz-index-record-uncompressed-size
  67. xz-index-record-block-size
  68. decode-xz-index-record
  69. encode-xz-index-record
  70. make-xz-index
  71. xz-index?
  72. xz-index-records
  73. xz-index-crc32
  74. xz-index-size
  75. decode-xz-index
  76. encode-xz-index
  77. make-xz-stream
  78. xz-stream?
  79. xz-stream-header
  80. xz-stream-blocks
  81. xz-stream-index
  82. xz-stream-footer
  83. xz-stream-size
  84. xz-stream-uncompressed-size
  85. xz-stream-fold-right
  86. read-xz-streams))
  87. (define magic-header-bytes #vu8(#xfd #x37 #x7a #x58 #x5a #x00))
  88. (define magic-footer-bytes #vu8(#x59 #x5a))
  89. (define* (magic-bytes? ref bv #:optional (start 0))
  90. (let loop ((k 0) (j start))
  91. (if (>= k (bytevector-length ref))
  92. #t
  93. (and (= (bytevector-u8-ref ref k)
  94. (bytevector-u8-ref bv j))
  95. (loop (1+ k) (1+ j))))))
  96. (define bytevector-crc32
  97. (let ((table (vector-unfold (lambda (k)
  98. (fold (lambda (_ x)
  99. (if (odd? x)
  100. (logxor (ash x -1) #xedb88320)
  101. (ash x -1)))
  102. k
  103. (iota 8)))
  104. 256)))
  105. (lambda* (bv #:optional (start 0) (end (bytevector-length bv)))
  106. "Calculate the 32-bit CRC (Cyclic Redundancy Check) of BV. The
  107. optional parameters START and END may be set to calculate the check
  108. over a specific part of BV (rather than the whole thing)."
  109. (define inverted
  110. (fold (lambda (k crc)
  111. (let* ((byte (bytevector-u8-ref bv k))
  112. (index (logxor byte (bit-extract crc 0 8))))
  113. (logxor (vector-ref table index) (ash crc -8))))
  114. #xffffffff
  115. (iota (- end start) start)))
  116. (bit-extract (lognot inverted) 0 32))))
  117. (define* (bytevector-add-crc32! bv #:optional crc32
  118. (target (- (bytevector-length bv) 4))
  119. (start 0)
  120. (end (- (bytevector-length bv) 4)))
  121. "Write a 32-bit CRC (Cyclic Redundancy Check) to the last four bytes
  122. of BV. If CRC32 is set, write that value. Otherwise, compute the
  123. 32-bit CRC over all but the last 4 bytes of BV."
  124. (let* ((x (or crc32 (bytevector-crc32 bv start end))))
  125. (bytevector-u32-set! bv target x 'little)))
  126. (define* (find-xz-integer-end bv #:optional (start 0)
  127. (end (bytevector-length bv)))
  128. (let loop ((k start))
  129. (cond
  130. ((>= k end) #f)
  131. ((< (bytevector-u8-ref bv k) 128) (1+ k))
  132. (else (loop (1+ k))))))
  133. (define* (decode-xz-integer bv #:optional (start 0)
  134. (end (bytevector-length bv)))
  135. (let loop ((k start) (shift 0) (acc 0))
  136. (when (>= k end)
  137. (error "Invalid multibyte integer."))
  138. (let ((b (bytevector-u8-ref bv k)))
  139. (if (< b 128)
  140. (begin
  141. (when (or (and (> k start) (zero? b))
  142. (not (= (1+ k) end)))
  143. (error "Invalid multibyte integer."))
  144. (logior (ash b shift) acc))
  145. (loop (1+ k) (+ shift 7)
  146. (logior (ash (bit-extract b 0 7) shift) acc))))))
  147. (define (xz-integer-length n)
  148. (1+ (quotient (1- (integer-length n)) 7)))
  149. (define (xz-integer->bytevector n)
  150. (define bv (make-bytevector (xz-integer-length n)))
  151. (let loop ((n n) (k 0))
  152. (cond
  153. ((< n 128) (bytevector-u8-set! bv k n) bv)
  154. (else (let ((byte (logior #x80 (bit-extract n 0 7))))
  155. (bytevector-u8-set! bv k byte)
  156. (loop (ash n -7) (1+ k)))))))
  157. (define encode-xz-integer
  158. (make-thing-encoder xz-integer->bytevector))
  159. (define-immutable-record-type <xz-stream-header>
  160. (make-xz-stream-header check-type reserved crc32)
  161. xz-stream-header?
  162. ;; A number from 0 to 15.
  163. (check-type xz-stream-header-check-type)
  164. ;; The "reserved" part of the stream flags. This is a list
  165. ;; consisting of the reserved byte before the check type and the
  166. ;; reserved nibble after it.
  167. (reserved xz-stream-header-reserved)
  168. ;; A four-byte number or #f.
  169. (crc32 xz-stream-header-crc32))
  170. (define* (decode-xz-stream-header bv #:optional (start 0)
  171. (end (bytevector-length bv)))
  172. "Decode the contents of the bytevector BV as an XZ stream header.
  173. Optionally, START and END indexes can be provided to decode only a
  174. part of BV."
  175. (unless (= (- end start) 12)
  176. (error "Invalid XZ stream header size."))
  177. (unless (magic-bytes? magic-header-bytes bv start)
  178. (error "Invalid XZ magic bytes."))
  179. (let* ((flags (bytevector-u8-ref bv (+ start 7)))
  180. (check-type (bit-extract flags 0 4))
  181. (reserved (list (bytevector-u8-ref bv (+ start 6))
  182. (bit-extract flags 4 8)))
  183. (crc32* (bytevector-u32-ref bv (+ start 8) 'little))
  184. (crc32 (if (= (bytevector-crc32 bv (+ start 6) (- end 4)) crc32*)
  185. #f
  186. crc32*)))
  187. (make-xz-stream-header check-type reserved crc32)))
  188. (define (xz-stream-header->bytevector strm-head)
  189. (define bv (make-bytevector 12))
  190. (match-let* ((($ <xz-stream-header> check-type reserved crc32) strm-head)
  191. ((reserved-byte reserved-nibble) reserved)
  192. (byte7 (logior (ash reserved-nibble 4) check-type)))
  193. (bytevector-copy! magic-header-bytes 0 bv 0 6)
  194. (bytevector-u8-set! bv 6 reserved-byte)
  195. (bytevector-u8-set! bv 7 byte7)
  196. (bytevector-add-crc32! bv crc32 8 6 8)
  197. bv))
  198. (define encode-xz-stream-header
  199. (make-thing-encoder xz-stream-header->bytevector))
  200. (define-immutable-record-type <xz-stream-footer>
  201. (make-xz-stream-footer check-type reserved backward-size crc32)
  202. xz-stream-footer?
  203. ;; A number from 0 to 15.
  204. (check-type xz-stream-footer-check-type)
  205. ;; The "reserved" part of the stream flags. This is a list
  206. ;; consisting of the reserved byte before the check type and the
  207. ;; reserved nibble after it.
  208. (reserved xz-stream-footer-reserved)
  209. ;; A four-byte number.
  210. (backward-size xz-stream-footer-backward-size)
  211. ;; A four-byte number.
  212. (crc32 xz-stream-footer-crc32))
  213. (define* (decode-xz-stream-footer bv #:optional (start 0)
  214. (end (bytevector-length bv)))
  215. (unless (= (- end start) 12)
  216. (error "Invalid XZ stream footer size."))
  217. (unless (magic-bytes? magic-footer-bytes bv (+ start 10))
  218. (error "Invalid XZ stream footer magic bytes."))
  219. (let* ((crc32* (bytevector-u32-ref bv start 'little))
  220. (crc32 (if (= (bytevector-crc32 bv (+ start 4) (- end 2)) crc32*)
  221. #f
  222. crc32*))
  223. (raw-backward-size (bytevector-u32-ref bv (+ start 4) 'little))
  224. (backward-size (* (1+ raw-backward-size) 4))
  225. (flags (bytevector-u8-ref bv (+ start 9)))
  226. (check-type (bit-extract flags 0 4))
  227. (reserved (list (bytevector-u8-ref bv (+ start 8))
  228. (bit-extract flags 4 8))))
  229. (make-xz-stream-footer check-type reserved backward-size crc32)))
  230. (define (xz-stream-footer->bytevector foot)
  231. (define bv (make-bytevector 12))
  232. (match-let* ((($ <xz-stream-footer> check-type reserved
  233. backward-size crc32) foot)
  234. ((reserved-byte reserved-nibble) reserved)
  235. (byte9 (logior (ash reserved-nibble 4) check-type)))
  236. (bytevector-copy! magic-footer-bytes 0 bv 10 2)
  237. (bytevector-u8-set! bv 8 reserved-byte)
  238. (bytevector-u8-set! bv 9 byte9)
  239. (bytevector-u32-set! bv 4 (1- (quotient backward-size 4)) 'little)
  240. (bytevector-add-crc32! bv crc32 0 4 10)
  241. bv))
  242. (define encode-xz-stream-footer
  243. (make-thing-encoder xz-stream-footer->bytevector))
  244. (define-immutable-record-type <xz-filter-flags>
  245. (make-xz-filter-flags id properties)
  246. xz-filter-flags?
  247. ;; An (XZ) integer.
  248. (id xz-filter-flags-id)
  249. ;; A bytevector.
  250. (properties xz-filter-flags-properties))
  251. (define (xz-filter-flags-size flags)
  252. (let ((id (xz-filter-flags-id flags))
  253. (properties (xz-filter-flags-properties flags)))
  254. (+ (xz-integer-length id)
  255. (xz-integer-length (bytevector-length properties))
  256. (bytevector-length properties))))
  257. (define* (read-xz-filter-flags bv #:optional (start 0)
  258. (end (bytevector-length bv)))
  259. (let* ((id-end (find-xz-integer-end bv start end))
  260. (id (decode-xz-integer bv start id-end))
  261. (ps-end (find-xz-integer-end bv id-end end))
  262. (properties-size (decode-xz-integer bv id-end ps-end)))
  263. (unless (>= end (+ ps-end properties-size))
  264. (error "Invalid XZ filter flags."))
  265. (make-xz-filter-flags id (sub-bytevector bv ps-end
  266. (+ ps-end properties-size)))))
  267. (define* (decode-xz-filter-flags bv #:optional (start 0)
  268. (end (bytevector-length bv)))
  269. (let ((filter (read-xz-filter-flags bv start end)))
  270. (unless (= (- end start) (xz-filter-flags-size filter))
  271. (error "Invalid XZ filter flags."))
  272. filter))
  273. (define (xz-filter-flags->bytevector flags)
  274. (let* ((size (xz-filter-flags-size flags))
  275. (bv (make-bytevector size))
  276. (id-bv (xz-integer->bytevector (xz-filter-flags-id flags)))
  277. (props (xz-filter-flags-properties flags))
  278. (len-bv (xz-integer->bytevector (bytevector-length props))))
  279. (bytevector-copy! id-bv 0 bv 0 (bytevector-length id-bv))
  280. (bytevector-copy! len-bv 0 bv
  281. (bytevector-length id-bv)
  282. (bytevector-length len-bv))
  283. (bytevector-copy! props 0 bv
  284. (+ (bytevector-length id-bv)
  285. (bytevector-length len-bv))
  286. (bytevector-length props))
  287. bv))
  288. (define encode-xz-filter-flags
  289. (make-thing-encoder xz-filter-flags->bytevector))
  290. (define-immutable-record-type <xz-block-header>
  291. (make-xz-block-header reserved compressed-size uncompressed-size
  292. filters padding crc32)
  293. xz-block-header?
  294. ;; A reserved nibble. It should always be zero.
  295. (reserved xz-block-header-reserved)
  296. ;; An (XZ) integer.
  297. (compressed-size xz-block-header-compressed-size)
  298. ;; An (XZ) integer.
  299. (uncompressed-size xz-block-header-uncompressed-size)
  300. ;; A list of <xz-filter-flags>.
  301. (filters xz-block-header-filters)
  302. ;; The number of padding bytes.
  303. (padding xz-block-header-padding set-xz-block-header-padding)
  304. ;; A four-byte number or #f.
  305. (crc32 xz-block-header-crc32))
  306. (define (xz-block-header-size bh)
  307. (let* ((c-size (xz-block-header-compressed-size bh))
  308. (u-size (xz-block-header-uncompressed-size bh))
  309. (padding (xz-block-header-padding bh))
  310. (filters (xz-block-header-filters bh)))
  311. (apply + 1 1 4 ; size, flags, and crc32
  312. padding
  313. (if c-size (xz-integer-length c-size) 0)
  314. (if u-size (xz-integer-length u-size) 0)
  315. (map xz-filter-flags-size filters))))
  316. (define* (decode-xz-block-header bv #:optional (start 0)
  317. (end (bytevector-length bv)))
  318. (when (< (- end start) 6)
  319. (error "Invalid XZ block header size."))
  320. (let* ((raw-size (bytevector-u8-ref bv start))
  321. (size (* (1+ raw-size) 4)))
  322. (unless (= (- end start) size)
  323. (error "Invalid XZ block header size."))
  324. (let* ((flags (bytevector-u8-ref bv (1+ start)))
  325. (filter-count (1+ (bit-extract flags 0 2)))
  326. (reserved (bit-extract flags 2 6))
  327. (compressed-size? (not (zero? (bit-extract flags 6 7))))
  328. (uncompressed-size? (not (zero? (bit-extract flags 7 8))))
  329. (cs-end (if compressed-size?
  330. (find-xz-integer-end bv (+ start 2) end)
  331. (+ start 2)))
  332. (compressed-size (and compressed-size?
  333. (decode-xz-integer bv (+ start 2) cs-end)))
  334. (us-end (if uncompressed-size?
  335. (find-xz-integer-end bv cs-end end)
  336. cs-end))
  337. (uncompressed-size (and uncompressed-size?
  338. (decode-xz-integer bv cs-end us-end)))
  339. (filters (let loop ((k us-end) (j 0) (acc '()))
  340. (if (>= j filter-count)
  341. (reverse acc)
  342. (let ((flags (read-xz-filter-flags bv k end)))
  343. (loop (+ k (xz-filter-flags-size flags))
  344. (1+ j)
  345. (cons flags acc))))))
  346. (f-end (+ us-end (reduce + 0 (map xz-filter-flags-size filters))))
  347. (padding (- size 4 (- f-end start)))
  348. (p-end (if (or (< padding 0))
  349. (error "Invalid block header padding.")
  350. (+ f-end padding)))
  351. (crc32* (bytevector-u32-ref bv p-end 'little))
  352. (crc32 (if (= (bytevector-crc32 bv start (- end 4)) crc32*)
  353. #f
  354. crc32*)))
  355. (unless (bytevector-zero? bv f-end p-end)
  356. (error "Invalid block header padding."))
  357. (make-xz-block-header reserved compressed-size uncompressed-size
  358. filters padding crc32))))
  359. (define (xz-block-header->bytevector bh)
  360. (let* ((reserved (xz-block-header-reserved bh))
  361. (c-size (xz-block-header-compressed-size bh))
  362. (u-size (xz-block-header-uncompressed-size bh))
  363. (filters (xz-block-header-filters bh))
  364. (crc32 (xz-block-header-crc32 bh))
  365. (size (xz-block-header-size bh))
  366. (bv (make-bytevector size 0))
  367. (raw-size (1- (quotient size 4)))
  368. (flags (logior (1- (length filters))
  369. (ash reserved 2)
  370. (ash (if c-size 1 0) 6)
  371. (ash (if u-size 1 0) 7)))
  372. (c-size-start 2)
  373. (u-size-start (+ c-size-start
  374. (if c-size (xz-integer-length c-size) 0)))
  375. (filters-start (+ u-size-start
  376. (if u-size (xz-integer-length u-size) 0))))
  377. (bytevector-u8-set! bv 0 raw-size)
  378. (bytevector-u8-set! bv 1 flags)
  379. (when c-size
  380. (encode-xz-integer c-size bv c-size-start))
  381. (when u-size
  382. (encode-xz-integer u-size bv u-size-start))
  383. (let loop ((filters filters) (k filters-start))
  384. (match filters
  385. (() #t)
  386. ((filter . rest)
  387. (encode-xz-filter-flags filter bv k)
  388. (loop rest (+ k (xz-filter-flags-size filter))))))
  389. (bytevector-add-crc32! bv crc32)
  390. bv))
  391. (define encode-xz-block-header
  392. (make-thing-encoder xz-block-header->bytevector))
  393. (define-immutable-record-type <xz-index-record>
  394. (make-xz-index-record unpadded-size uncompressed-size)
  395. xz-index-record?
  396. ;; An (XZ) integer.
  397. (unpadded-size xz-index-record-unpadded-size)
  398. ;; An (XZ) integer.
  399. (uncompressed-size xz-index-record-uncompressed-size))
  400. (define (xz-index-record-size rd)
  401. (+ (xz-integer-length (xz-index-record-unpadded-size rd))
  402. (xz-integer-length (xz-index-record-uncompressed-size rd))))
  403. (define (xz-index-record-block-size record)
  404. (let ((up-size (xz-index-record-unpadded-size record)))
  405. (+ up-size (padding-delta up-size 4))))
  406. (define (xz-index-records->blocks-size rds)
  407. "Compute the sum of the block sizes from the XZ index records RDS."
  408. (define block-sizes
  409. (map (lambda (rd)
  410. (let ((s (xz-index-record-unpadded-size rd)))
  411. (+ s (padding-delta s 4))))
  412. rds))
  413. (reduce + 0 block-sizes))
  414. (define* (decode-xz-index-record bv #:optional (start 0)
  415. (end (bytevector-length bv)))
  416. (let ((middle (find-xz-integer-end bv start end)))
  417. (make-xz-index-record
  418. (decode-xz-integer bv start middle)
  419. (decode-xz-integer bv middle end))))
  420. (define (xz-index-record->bytevector rd)
  421. (let ((up-size (xz-index-record-unpadded-size rd))
  422. (uc-size (xz-index-record-uncompressed-size rd)))
  423. (bytevector-append (encode-xz-integer up-size)
  424. (encode-xz-integer uc-size))))
  425. (define encode-xz-index-record
  426. (make-thing-encoder xz-index-record->bytevector))
  427. (define-immutable-record-type <xz-index>
  428. (make-xz-index records crc32)
  429. xz-index?
  430. ;; A list of <xz-index-record>.
  431. (records xz-index-records)
  432. ;; A four-byte number or #f.
  433. (crc32 xz-index-crc32))
  434. (define (xz-index-size idx)
  435. (let* ((records (xz-index-records idx))
  436. (base (+ 5 (xz-integer-length (length records))
  437. (reduce + 0 (map xz-index-record-size records)))))
  438. (+ base (padding-delta base 4))))
  439. (define (padding-delta n padding)
  440. (let ((r (modulo n padding)))
  441. (if (zero? r) 0 (- padding r))))
  442. (define* (decode-xz-index bv #:optional (start 0)
  443. (end (bytevector-length bv)))
  444. (unless (zero? (bytevector-u8-ref bv start))
  445. (error "Invalid XZ index indicator."))
  446. (let* ((c-end (find-xz-integer-end bv (1+ start) end))
  447. (count (decode-xz-integer bv (1+ start) c-end)))
  448. (call-with-values
  449. (lambda ()
  450. (let loop ((k c-end) (j 0) (acc '()))
  451. (if (>= j count)
  452. (values k (reverse acc))
  453. (let* ((ir-mid (find-xz-integer-end bv k end))
  454. (ir-end (find-xz-integer-end bv ir-mid end))
  455. (ir (decode-xz-index-record bv k ir-end)))
  456. (loop ir-end (1+ j) (cons ir acc))))))
  457. (lambda (rs-end records)
  458. (define p-end (+ rs-end (padding-delta rs-end 4)))
  459. (unless (= p-end (- end 4))
  460. (error "Invalid XZ index size."))
  461. (let* ((crc32* (bytevector-u32-ref bv p-end 'little))
  462. (crc32 (if (= (bytevector-crc32 bv start (- end 4)) crc32*)
  463. #f
  464. crc32*)))
  465. (unless (bytevector-zero? bv rs-end p-end)
  466. (error "Invalid XZ index padding."))
  467. (make-xz-index records crc32))))))
  468. (define (xz-index->bytevector idx)
  469. (let* ((records (xz-index-records idx))
  470. (count (length records))
  471. (count-size (xz-integer-length count))
  472. (records-size (reduce + 0 (map xz-index-record-size records)))
  473. (raw-size (+ 6 records-size))
  474. (size (+ raw-size (padding-delta raw-size 4)))
  475. (bv (make-bytevector size 0)))
  476. (encode-xz-integer count bv 1)
  477. (let loop ((records records) (k (1+ count-size)))
  478. (match records
  479. (() *unspecified*)
  480. ((rd . rest)
  481. (encode-xz-index-record rd bv k)
  482. (loop rest (+ k (xz-index-record-size rd))))))
  483. (bytevector-add-crc32! bv (xz-index-crc32 idx))
  484. bv))
  485. (define encode-xz-index
  486. (make-thing-encoder xz-index->bytevector))
  487. (define (bytevector-rfind-footer-magic-bytes bv)
  488. "Find the last occurance of the XZ stream footer magic bytes in BV."
  489. (let loop ((k (- (bytevector-length bv) 2)))
  490. (and (not (negative? k))
  491. (or (and (magic-bytes? magic-footer-bytes bv k) k)
  492. (loop (1- k))))))
  493. (define (seek-back-to-xz-stream-footer port)
  494. "Search PORT backwards for the beginning an XZ stream footer"
  495. (define bv (make-bytevector 12))
  496. (let loop ((k (- (ftell port) 12)))
  497. (cond
  498. ((< k 0) #f)
  499. (else
  500. (seek port k SEEK_SET)
  501. (get-bytevector-n! port bv 0 12)
  502. (let ((j (bytevector-rfind-footer-magic-bytes bv)))
  503. (unless (bytevector-zero? bv (if j (+ j 2) 0))
  504. (error "Invalid XZ stream padding."))
  505. (if j
  506. (seek port (- (+ k j) 10) SEEK_SET)
  507. (loop (- k 11))))))))
  508. (define-immutable-record-type <xz-stream>
  509. (make-xz-stream header blocks index footer)
  510. xz-stream?
  511. (header xz-stream-header)
  512. (blocks xz-stream-blocks)
  513. (index xz-stream-index)
  514. (footer xz-stream-footer))
  515. (define (xz-stream-size strm)
  516. (let* ((index (xz-stream-index strm))
  517. (records (xz-index-records index))
  518. (index-size (xz-index-size index))
  519. (blocks-size (xz-index-records->blocks-size records))
  520. (size (+ 24 blocks-size index-size)))
  521. (+ size (padding-delta size 4))))
  522. (define (xz-stream-uncompressed-size strm)
  523. (reduce + 0 (map xz-index-record-uncompressed-size
  524. (xz-index-records (xz-stream-index strm)))))
  525. (define (xz-stream-block-bounds strm k)
  526. "Return the offset and size (as two values) of the Kth block in XZ
  527. stream STRM."
  528. (let loop ((records (xz-index-records (xz-stream-index strm)))
  529. (j 0)
  530. (offset 0))
  531. (match records
  532. (() (scm-error 'out-of-range 'xz-stream-block-bounds
  533. "Bad XZ stream block index ~A"
  534. (list k) (list k)))
  535. ((record . rest)
  536. (let ((size (xz-index-record-block-size record)))
  537. (if (= j k)
  538. (values offset size)
  539. (loop rest (1+ j) (+ offset size))))))))
  540. (define (read-xz-block-headers port records)
  541. (let loop ((records records) (acc '()))
  542. (match records
  543. (() (reverse acc))
  544. ((record . rest)
  545. (let* ((raw-header-size (get-u8 port))
  546. (header-size (* (1+ raw-header-size) 4))
  547. (bv (make-bytevector header-size)))
  548. (bytevector-u8-set! bv 0 raw-header-size)
  549. (get-bytevector-n! port bv 1 (1- header-size))
  550. (let ((b-header (decode-xz-block-header bv))
  551. (size (xz-index-record-block-size record)))
  552. (seek port (- size header-size) SEEK_CUR)
  553. (loop rest (cons b-header acc))))))))
  554. (define (read-xz-stream-from-footer port)
  555. (let* ((footer-bv (get-bytevector-n port 12))
  556. (footer (decode-xz-stream-footer footer-bv))
  557. (index-size (xz-stream-footer-backward-size footer)))
  558. (seek port (- 0 12 index-size) SEEK_CUR)
  559. (let* ((index-bv (get-bytevector-n port index-size))
  560. (index (decode-xz-index index-bv))
  561. (records (xz-index-records index))
  562. (blocks-size (xz-index-records->blocks-size records)))
  563. (seek port (- 0 index-size blocks-size 12) SEEK_CUR)
  564. (let* ((position (ftell port))
  565. (header-bv (get-bytevector-n port 12))
  566. (header (decode-xz-stream-header header-bv))
  567. (b-headers (read-xz-block-headers port records)))
  568. (seek port position SEEK_SET)
  569. (make-xz-stream header b-headers index footer)))))
  570. (define (xz-stream-fold-right kons knil port)
  571. (seek port 0 SEEK_END)
  572. (let loop ((acc knil))
  573. (seek-back-to-xz-stream-footer port)
  574. (let ((result (kons (read-xz-stream-from-footer port) acc)))
  575. (if (zero? (ftell port))
  576. result
  577. (loop result)))))
  578. (define (read-xz-streams port)
  579. (xz-stream-fold-right cons '() port))