tar-extension.scm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354
  1. ;;; Disarchive
  2. ;;; Copyright © 2020, 2021 Timothy Sample <samplet@ngyro.com>
  3. ;;;
  4. ;;; This file is part of Disarchive.
  5. ;;;
  6. ;;; Disarchive is free software: you can redistribute it and/or modify
  7. ;;; it under the terms of the GNU General Public License as published by
  8. ;;; the Free Software Foundation, either version 3 of the License, or
  9. ;;; (at your option) any later version.
  10. ;;;
  11. ;;; Disarchive is distributed in the hope that it will be useful,
  12. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. ;;; GNU General Public License for more details.
  15. ;;;
  16. ;;; You should have received a copy of the GNU General Public License
  17. ;;; along with Disarchive. If not, see <http://www.gnu.org/licenses/>.
  18. (define-module (disarchive kinds tar-extension)
  19. #:use-module (disarchive kinds binary-string)
  20. #:use-module (disarchive kinds tar-header) ; recursive
  21. #:use-module (disarchive kinds zero-string)
  22. #:use-module (disarchive serialization)
  23. #:use-module (disarchive utils)
  24. #:use-module (gcrypt base64)
  25. #:use-module (ice-9 binary-ports)
  26. #:use-module (ice-9 iconv)
  27. #:use-module (ice-9 match)
  28. #:use-module (rnrs bytevectors)
  29. #:use-module ((rnrs io ports) #:select (port-eof?))
  30. #:use-module (srfi srfi-1)
  31. #:use-module (srfi srfi-9)
  32. #:use-module (srfi srfi-71)
  33. #:export (<tar-extension>
  34. make-tar-extension
  35. tar-extension?
  36. tar-extension-header
  37. tar-extension-content
  38. tar-extension-padding
  39. -tar-extension-
  40. valid-pax-records?
  41. decode-pax-records
  42. encode-pax-records
  43. -pax-records-
  44. valid-gnu-path?
  45. decode-gnu-path
  46. encode-gnu-path
  47. -gnu-path-
  48. valid-gnu-linkpath?
  49. decode-gnu-linkpath
  50. encode-gnu-linkpath
  51. -gnu-link-path-
  52. typeflag-validator
  53. typeflag-decoder
  54. typeflag-encoder
  55. typeflag-serializer))
  56. ;;; Commentary:
  57. ;;;
  58. ;;; Certain tarball headers are "extension headers" and they may be
  59. ;;; followed by one or more records. This module contains a record type
  60. ;;; for representing extensions as well as procedures for encoding and
  61. ;;; decoding them.
  62. ;;;
  63. ;;; Code:
  64. (define ascii-lf 10)
  65. (define ascii-space 32)
  66. (define ascii-= 61)
  67. (define-record-type <tar-extension>
  68. (make-tar-extension header content)
  69. tar-extension?
  70. ;; (or (? tar-header?) #f)
  71. (header tar-extension-header)
  72. ;; This outer "or" distinguishes between GNU records and pax records.
  73. ;; (or (((? string?) . (? zero-string?)))
  74. ;; ((or ((? binary-string?) . (? binary-string?))
  75. ;; (? bytevector?)) ...))
  76. (content tar-extension-content))
  77. ;; Validators
  78. (define (valid-pax-records? records)
  79. (define (no-=-binary-string? str)
  80. (match str
  81. ((? string?) (not (string-index str #\=)))
  82. ((? bytevector?) (not (let loop ((k 0))
  83. (and (< k (bytevector-length str))
  84. (or (= (bytevector-u8-ref str k) ascii-=)
  85. (loop (1+ k)))))))
  86. (_ #f)))
  87. (define (key? obj)
  88. (and (valid-binary-string? obj)
  89. (no-=-binary-string? obj)))
  90. (define (key+value? obj)
  91. (match obj
  92. (((? key?) . (? valid-binary-string?)) #t)
  93. (_ #f)))
  94. (match records
  95. (((or (? key+value?) (? bytevector?)) ...) #t)
  96. (_ #f)))
  97. (define (make-gnu-validator name)
  98. (define (name? x) (and (string? x) (string=? x name)))
  99. (match-lambda
  100. ((((? name?) . (? valid-zero-string?))) #t)
  101. (_ #f)))
  102. (define valid-gnu-path? (make-gnu-validator "path"))
  103. (define valid-gnu-linkpath? (make-gnu-validator "linkpath"))
  104. ;; Extension records in pax format
  105. (define* (get-pax-length bv #:optional (start 0)
  106. (end (bytevector-length bv)))
  107. "Read a pax record length from BV and return two values: the index
  108. where the length ends and the length itself. Optionally, START and END
  109. indexes can be provided to read from only a part of BV."
  110. (define (ascii-number? b)
  111. (and (<= 48 b) (<= b 57)))
  112. (define (blank? b)
  113. (= b 32))
  114. (define (decimal-list->number xs)
  115. (let loop ((xs xs) (k 0) (acc 0))
  116. (match xs
  117. (() acc)
  118. ((x . xs) (loop xs (1+ k) (+ acc (* (- x 48) (expt 10 k))))))))
  119. (let loop ((k start) (acc '()))
  120. (if (>= k end)
  121. (values k (decimal-list->number acc))
  122. (match (bytevector-u8-ref bv k)
  123. ((? ascii-number? b) (loop (1+ k) (cons b acc)))
  124. ((? blank? b) (values (1+ k) (decimal-list->number acc)))
  125. (_ (values start #f))))))
  126. (define* (get-pax-key+value bv #:optional (start 0)
  127. (end (bytevector-length bv)))
  128. "Read a pax record key-value-pair from BV. Optionally, START and END
  129. indexes can be provided to read from only a part of BV."
  130. (and (> end start)
  131. (= (bytevector-u8-ref bv (1- end)) ascii-lf)
  132. (match (bytevector-index bv ascii-= start end)
  133. (#f #f)
  134. (idx (cons (decode-binary-string bv start idx)
  135. (decode-binary-string bv (1+ idx) (1- end)))))))
  136. (define* (get-pax-record bv #:optional (start 0)
  137. (end (bytevector-length bv)))
  138. "Read a pax record from BV and return two values: the index where the
  139. record ends and the record itself. Optionally, START and END indexes
  140. can be provided to read from only a part of BV."
  141. (let* ((rstart length (get-pax-length bv start end))
  142. (rend (and length (+ start length))))
  143. (if (and rend (<= rstart rend end))
  144. (values rend
  145. (or (get-pax-key+value bv rstart rend)
  146. (sub-bytevector bv start rend)))
  147. (values end (sub-bytevector bv start end)))))
  148. (define* (decode-pax-records bv #:optional (start 0)
  149. (end (bytevector-length bv)))
  150. "Decode the contents of the bytevector BV as a list of pax extension
  151. records. Optionally, START and END indexes can be provided to decode
  152. only a part of BV."
  153. (let loop ((k start) (acc '()))
  154. (if (>= k end)
  155. (reverse acc)
  156. (let ((next-k record (get-pax-record bv k end)))
  157. (loop next-k (cons record acc))))))
  158. (define (pax-record->bytevector record)
  159. "Convert the pax extension record"
  160. (define digit-count (compose inexact->exact 1+ floor log10))
  161. (match record
  162. ((key . value)
  163. (let* ((bkey (encode-binary-string key))
  164. (bvalue (encode-binary-string value))
  165. ;; There are three delimiters to account for.
  166. (n (+ 3 (bytevector-length bkey) (bytevector-length bvalue)))
  167. ;; We have to include the length of the length, too.
  168. (len (+ n (digit-count (+ n (digit-count n))))))
  169. (bytevector-append (string->utf8 (number->string len))
  170. #vu8(32) bkey #vu8(61) bvalue #vu8(10))))
  171. ((? bytevector?) record)
  172. (_ (scm-error 'misc-error 'pax-record->bytevector
  173. (string-append "Invalid pax extension record: ~A")
  174. (list record) (list record)))))
  175. (define encode-pax-records
  176. (case-lambda
  177. "Encode the pax extension records RECORDS. If BV is set, the result
  178. will be written into BV. Otherwise, the result will be written into a
  179. new bytevector. If you are providing a bytevector, you can also provide
  180. START and END indexes to control where the result is written."
  181. ((records)
  182. (apply bytevector-append (map pax-record->bytevector records)))
  183. ((records bv)
  184. (encode-pax-records records bv 0 (bytevector-length bv)))
  185. ((records bv start)
  186. (encode-pax-records records bv start (bytevector-length bv)))
  187. ((records bv start end)
  188. (let* ((brecords (encode-pax-records records))
  189. (brecords-len (bytevector-length brecords))
  190. (space (- end start))
  191. (leftover-space (- brecords-len space)))
  192. (bytevector-copy! brecords 0 bv start (min brecords-len space))
  193. (when (positive? leftover-space)
  194. (bytevector-fill!* bv 0 end leftover-space))))))
  195. ;; Extension records in GNU format
  196. (define (make-gnu-decoder name)
  197. "Create a decoder procedure for decoding GNU extension records with
  198. field name NAME."
  199. (lambda* (bv #:optional (start 0) (end (bytevector-length bv)))
  200. `((,name . ,(decode-zero-string bv start end)))))
  201. (define (make-gnu-encoder name)
  202. "Create an encoder procedure for encoding GNU extension records with
  203. field name NAME."
  204. (define (name? x) (string=? name x))
  205. (lambda* (records #:optional bv (start 0) end)
  206. (match records
  207. ((((? name?) . value))
  208. (encode-zero-string value bv start end))
  209. (_ (scm-error 'misc-error 'make-gnu-encoder
  210. (string-append "Invalid tar extension records: ~A")
  211. (list records) (list records))))))
  212. (define decode-gnu-path (make-gnu-decoder "path"))
  213. (define encode-gnu-path (make-gnu-encoder "path"))
  214. (define decode-gnu-linkpath (make-gnu-decoder "linkpath"))
  215. (define encode-gnu-linkpath (make-gnu-encoder "linkpath"))
  216. ;; Codec lookup
  217. (define (typeflag-decoder typeflag)
  218. "Find a decoder for the tarball typeflag TYPEFLAG."
  219. (cond
  220. ((or (= typeflag (char->integer #\g))
  221. (= typeflag (char->integer #\x)))
  222. decode-pax-records)
  223. ((= typeflag (char->integer #\L))
  224. decode-gnu-path)
  225. ((= typeflag (char->integer #\K))
  226. decode-gnu-linkpath)))
  227. (define (typeflag-encoder typeflag)
  228. "Find an encoder for the tarball typeflag TYPEFLAG."
  229. (cond
  230. ((or (= typeflag (char->integer #\g))
  231. (= typeflag (char->integer #\x)))
  232. encode-pax-records)
  233. ((= typeflag (char->integer #\L))
  234. encode-gnu-path)
  235. ((= typeflag (char->integer #\K))
  236. encode-gnu-linkpath)))
  237. ;; Serialization
  238. (define (pax-records->sexp records)
  239. (map (match-lambda
  240. (((? binary-string? key) . (? binary-string? value))
  241. (cons (serialize -binary-string- key #f)
  242. (serialize -binary-string- value #f)))
  243. ((? bytevector? bv)
  244. (base64-encode bv)))
  245. records))
  246. (define (sexp->pax-records obj)
  247. (map (match-lambda
  248. ((key . value) (cons (deserialize -binary-string- key #f)
  249. (deserialize -binary-string- value #f)))
  250. (b64 (base64-decode b64)))
  251. obj))
  252. (define -pax-records-
  253. (make-serializer
  254. (lambda (records _) (pax-records->sexp records))
  255. (lambda (obj _) (sexp->pax-records obj))))
  256. (define (make-gnu-serializer name)
  257. (define (name? x) (string=? x name))
  258. (make-serializer
  259. (lambda (records _)
  260. (match records
  261. ((((? name? key) . (? zero-string? value)))
  262. `((,key . ,(serialize -zero-string- value #f))))))
  263. (lambda (obj _)
  264. (match obj
  265. ((((? name? key) . value))
  266. `((,key . ,(deserialize -zero-string- value #f))))))))
  267. (define -gnu-path- (make-gnu-serializer "path"))
  268. (define -gnu-linkpath- (make-gnu-serializer "linkpath"))
  269. (define (typeflag-serializer typeflag)
  270. "Find a serializer for the tarball typeflag TYPEFLAG."
  271. (cond
  272. ((or (= typeflag (char->integer #\g))
  273. (= typeflag (char->integer #\x)))
  274. -pax-records-)
  275. ((= typeflag (char->integer #\L))
  276. -gnu-path-)
  277. ((= typeflag (char->integer #\K))
  278. -gnu-linkpath-)))
  279. (define (tar-extension->sexp ext)
  280. (match ext
  281. (($ <tar-extension> header content)
  282. (let ((-content- (if header
  283. (typeflag-serializer (tar-header-typeflag header))
  284. ;; XXX: Here, we assume that no header means a
  285. ;; pax global extension. If we ever move
  286. ;; beyond pax and GNU, this may be a bad idea.
  287. -pax-records-)))
  288. `((header . ,(serialize -tar-header- header #f))
  289. (content . ,(serialize -content- content #f)))))
  290. (_ (scm-error 'wrong-type-arg 'tar-extension->sexp
  291. (string-append "Wrong type argument in position 1 "
  292. "(expecting tar-extension): ~A")
  293. (list ext) (list ext)))))
  294. (define (sexp->tar-extension obj)
  295. (match obj
  296. ((('header . header-obj)
  297. ('content . content-obj))
  298. (let* ((header (and header-obj (deserialize -tar-header- header-obj #f)))
  299. (-content- (if header
  300. (typeflag-serializer (tar-header-typeflag header))
  301. ;; XXX: See comment in 'tar-extension->sexp'
  302. ;; for why this is dubious.
  303. -pax-records-)))
  304. (make-tar-extension header
  305. (deserialize -content- content-obj #f))))
  306. (_ (scm-error 'misc-error 'sexp->tar-extension
  307. (string-append "Invalid tar extension S-exp: ~A")
  308. (list obj) (list obj)))))
  309. (define -tar-extension-
  310. (make-serializer
  311. (lambda (ext _) (tar-extension->sexp ext))
  312. (lambda (obj _) (sexp->tar-extension obj))))