tar-header.scm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. ;;; Disarchive
  2. ;;; Copyright © 2020 Ludovic Courtès <ludo@gnu.org>
  3. ;;; Copyright © 2020, 2021 Timothy Sample <samplet@ngyro.com>
  4. ;;;
  5. ;;; This file is part of Disarchive.
  6. ;;;
  7. ;;; Disarchive is free software: you can redistribute it and/or modify
  8. ;;; it under the terms of the GNU General Public License as published by
  9. ;;; the Free Software Foundation, either version 3 of the License, or
  10. ;;; (at your option) any later version.
  11. ;;;
  12. ;;; Disarchive is distributed in the hope that it will be useful,
  13. ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;;; GNU General Public License for more details.
  16. ;;;
  17. ;;; You should have received a copy of the GNU General Public License
  18. ;;; along with Disarchive. If not, see <http://www.gnu.org/licenses/>.
  19. (define-module (disarchive kinds tar-header)
  20. #:use-module (disarchive kinds binary-string)
  21. #:use-module (disarchive kinds octal)
  22. #:use-module (disarchive kinds tar-extension) ; recursive
  23. #:use-module (disarchive kinds zero-string)
  24. #:use-module (disarchive serialization)
  25. #:use-module (disarchive utils)
  26. #:use-module (ice-9 binary-ports)
  27. #:use-module (ice-9 match)
  28. #:use-module (rnrs bytevectors)
  29. #:use-module (srfi srfi-1)
  30. #:use-module (srfi srfi-9)
  31. #:use-module (srfi srfi-9 gnu)
  32. #:use-module (srfi srfi-71)
  33. #:export (<tar-header>
  34. make-tar-header
  35. tar-header?
  36. tar-header-name
  37. tar-header-mode
  38. tar-header-uid
  39. tar-header-gid
  40. tar-header-size
  41. tar-header-mtime
  42. tar-header-chksum
  43. tar-header-typeflag
  44. tar-header-linkname
  45. tar-header-magic
  46. tar-header-version
  47. tar-header-uname
  48. tar-header-gname
  49. tar-header-devmajor
  50. tar-header-devminor
  51. tar-header-prefix
  52. tar-header-padding
  53. tar-header-data-padding
  54. set-tar-header-data-padding
  55. tar-header-extension
  56. set-tar-header-extension
  57. tar-header-path
  58. bytevector->tar-header
  59. tar-header->bytevector
  60. read-tar-header
  61. write-tar-header
  62. end-of-tarball-object?
  63. -tar-header-
  64. %default-default-tar-header
  65. default-tar-header))
  66. ;;; Commentary:
  67. ;;;
  68. ;;; A tar header is a record of fields that describe a file included
  69. ;;; in a tarball.
  70. ;;;
  71. ;;; Code:
  72. (define-immutable-record-type <tar-header>
  73. (make-tar-header name mode uid gid size mtime chksum typeflag
  74. linkname magic version uname gname devmajor
  75. devminor prefix padding data-padding extension)
  76. tar-header?
  77. ;; (? zero-string?)
  78. (name %tar-header-name)
  79. ;; (? octal?)
  80. (mode %tar-header-mode)
  81. ;; (? octal?)
  82. (uid %tar-header-uid)
  83. ;; (? octal?)
  84. (gid %tar-header-gid)
  85. ;; (? octal?)
  86. (size %tar-header-size)
  87. ;; (? octal?)
  88. (mtime %tar-header-mtime)
  89. ;; (? octal?)
  90. (chksum %tar-header-chksum)
  91. ;; (? byte?)
  92. (typeflag %tar-header-typeflag)
  93. ;; (? zero-string?)
  94. (linkname %tar-header-linkname)
  95. ;; (? binary-string?)
  96. (magic tar-header-magic)
  97. ;; (? binary-string?)
  98. (version tar-header-version)
  99. ;; (? zero-string?)
  100. (uname %tar-header-uname)
  101. ;; (? zero-string?)
  102. (gname %tar-header-gname)
  103. ;; (? octal?)
  104. (devmajor %tar-header-devmajor)
  105. ;; (? octal?)
  106. (devminor %tar-header-devminor)
  107. ;; (? zero-string?)
  108. (prefix %tar-header-prefix)
  109. ;; (? binary-string?)
  110. (padding tar-header-padding)
  111. ;; (? binary-string?)
  112. (data-padding tar-header-data-padding set-tar-header-data-padding)
  113. ;; (or (? tar-extension?) #f)
  114. (extension tar-header-extension set-tar-header-extension))
  115. (define tar-header-name (compose zero-string-value %tar-header-name))
  116. (define tar-header-mode (compose octal-value %tar-header-mode))
  117. (define tar-header-uid (compose octal-value %tar-header-uid))
  118. (define tar-header-gid (compose octal-value %tar-header-gid))
  119. (define tar-header-size (compose octal-value %tar-header-size))
  120. (define tar-header-mtime (compose octal-value %tar-header-mtime))
  121. (define tar-header-chksum (compose octal-value %tar-header-chksum))
  122. (define tar-header-linkname (compose zero-string-value %tar-header-linkname))
  123. (define tar-header-uname (compose zero-string-value %tar-header-uname))
  124. (define tar-header-gname (compose zero-string-value %tar-header-gname))
  125. (define tar-header-devmajor (compose octal-value %tar-header-devmajor))
  126. (define tar-header-devminor (compose octal-value %tar-header-devminor))
  127. (define tar-header-prefix (compose zero-string-value %tar-header-prefix))
  128. ;; XXX: This needs to be a procedure rather than a macro due to the
  129. ;; module dependency loop between tar-header and tar-extension.
  130. (define (tar-header-typeflag header)
  131. (%tar-header-typeflag header))
  132. (define (tar-header-path header)
  133. (or (and=> (tar-header-extension header)
  134. (lambda (extension)
  135. (any (match-lambda
  136. (("path" . (? zero-string? value))
  137. (zero-string-value value))
  138. (("path" . (? string? value))
  139. value)
  140. (_ #f))
  141. (tar-extension-content extension))))
  142. (let ((name (tar-header-name header))
  143. (prefix (tar-header-prefix header)))
  144. (if (string-null? prefix)
  145. name
  146. (string-append prefix "/" name)))))
  147. (define (bytevector->tar-header bv)
  148. (let ((name (decode-zero-string bv 0 100))
  149. (mode (decode-octal bv 100 108))
  150. (uid (decode-octal bv 108 116))
  151. (gid (decode-octal bv 116 124))
  152. (size (decode-octal bv 124 136))
  153. (mtime (decode-octal bv 136 148))
  154. (chksum (decode-octal bv 148 156))
  155. (typeflag (bytevector-u8-ref bv 156))
  156. (linkname (decode-zero-string bv 157 257))
  157. (magic (decode-binary-string bv 257 263))
  158. (version (decode-binary-string bv 263 265))
  159. (uname (decode-zero-string bv 265 297))
  160. (gname (decode-zero-string bv 297 329))
  161. (devmajor (decode-octal bv 329 337))
  162. (devminor (decode-octal bv 337 345))
  163. (prefix (decode-zero-string bv 345 500))
  164. (padding (if (bytevector-zero? bv 500 512)
  165. ""
  166. (decode-binary-string bv 500 512)))
  167. (data-padding "")
  168. (extension #f))
  169. (make-tar-header name mode uid gid size mtime chksum typeflag
  170. linkname magic version uname gname devmajor
  171. devminor prefix padding data-padding extension)))
  172. (define* (tar-header->bytevector header #:optional
  173. (bv (make-bytevector 512)))
  174. (match-let ((($ <tar-header> name mode uid gid size mtime chksum
  175. typeflag linkname magic version uname gname devmajor
  176. devminor prefix padding data-padding extension)
  177. header))
  178. (encode-zero-string name bv 0 100)
  179. (encode-octal mode bv 100 108)
  180. (encode-octal uid bv 108 116)
  181. (encode-octal gid bv 116 124)
  182. (encode-octal size bv 124 136)
  183. (encode-octal mtime bv 136 148)
  184. (encode-octal chksum bv 148 156)
  185. (bytevector-u8-set! bv 156 typeflag)
  186. (encode-zero-string linkname bv 157 257)
  187. (encode-binary-string magic bv 257 263)
  188. (encode-binary-string version bv 263 265)
  189. (encode-zero-string uname bv 265 297)
  190. (encode-zero-string gname bv 297 329)
  191. (encode-octal devmajor bv 329 337)
  192. (encode-octal devminor bv 337 345)
  193. (encode-zero-string prefix bv 345 500)
  194. (encode-binary-string padding bv 500 512)
  195. bv))
  196. (define self-extension-header?
  197. (let ((pax-global-extended-header (char->integer #\g)))
  198. (lambda (header)
  199. "Check if the tar header HEADER is an extension header that does
  200. not extend another tar header but rather extends itself."
  201. (= (tar-header-typeflag header) pax-global-extended-header))))
  202. (define extension-header?
  203. (let* ((pax-extended-header (char->integer #\x))
  204. (gnu-long-name (char->integer #\L))
  205. (gnu-long-link (char->integer #\K))
  206. (extension-headers (list pax-extended-header
  207. gnu-long-name
  208. gnu-long-link)))
  209. (lambda (header)
  210. "Check if the tar header HEADER is an extension header."
  211. (memv (tar-header-typeflag header) extension-headers))))
  212. (define (tar-header-extension-typeflag header)
  213. (let* ((extension (tar-header-extension header)))
  214. (and extension
  215. (tar-header-typeflag (or (tar-extension-header extension)
  216. header)))))
  217. (define (read-header-extension port header)
  218. (let* ((size (tar-header-size header))
  219. (typeflag (tar-header-typeflag header))
  220. (decode-content (typeflag-decoder typeflag))
  221. (content (decode-content (get-bytevector-n port size)))
  222. (remainder (modulo size 512))
  223. (padding (match (and (not (zero? remainder))
  224. (get-bytevector-n port (- 512 remainder)))
  225. (#f "")
  226. ((? bytevector-zero?) "")
  227. (bv (decode-binary-string bv)))))
  228. (values (make-tar-extension
  229. (and (not (self-extension-header? header))
  230. (set-tar-header-data-padding header padding))
  231. content)
  232. padding)))
  233. (define (write-extension-content port header content)
  234. (let* ((size (tar-header-size header))
  235. (typeflag (tar-header-typeflag header))
  236. (encode-content (typeflag-encoder typeflag))
  237. (content-bv (make-bytevector size))
  238. (remainder (modulo size 512))
  239. (padding-size (if (zero? remainder) 0 (- 512 remainder)))
  240. (padding-bv (make-bytevector padding-size 0))
  241. (data-padding (tar-header-data-padding header)))
  242. (encode-content content content-bv)
  243. (put-bytevector port content-bv)
  244. (encode-binary-string data-padding padding-bv)
  245. (put-bytevector port padding-bv)))
  246. (define end-of-tarball-object (list))
  247. (define (end-of-tarball-object? obj)
  248. (eq? obj end-of-tarball-object))
  249. (define %zeros (make-bytevector 512 0))
  250. (define (read-tar-header port)
  251. (let* ((bv (get-bytevector-n port 512))
  252. (zeros? (equal? %zeros bv))
  253. (next-bv (and zeros? (get-bytevector-n port 512))))
  254. (cond
  255. ((equal? next-bv %zeros) end-of-tarball-object)
  256. (else
  257. (when next-bv
  258. (unget-bytevector port next-bv))
  259. (let ((header (bytevector->tar-header bv)))
  260. (cond
  261. ((extension-header? header)
  262. (let* ((extension padding (read-header-extension port header))
  263. (next-header (bytevector->tar-header
  264. (get-bytevector-n port 512))))
  265. (set-tar-header-extension next-header extension)))
  266. ((self-extension-header? header)
  267. (let ((extension padding (read-header-extension port header)))
  268. (set-fields header
  269. ((tar-header-extension) extension)
  270. ((tar-header-data-padding) padding))
  271. (set-tar-header-extension header extension)))
  272. (else header)))))))
  273. (define (write-tar-header port header)
  274. (match (tar-header-extension header)
  275. (#f (put-bytevector port (tar-header->bytevector header)))
  276. (($ <tar-extension> e-header content)
  277. (when e-header
  278. (put-bytevector port (tar-header->bytevector e-header))
  279. (write-extension-content port e-header content))
  280. (put-bytevector port (tar-header->bytevector header))
  281. (unless e-header
  282. (write-extension-content port header content)))))
  283. (define -tar-header-
  284. (make-record-serializer
  285. make-tar-header
  286. `((name ,%tar-header-name ,-zero-string-)
  287. (mode ,%tar-header-mode ,-octal-)
  288. (uid ,%tar-header-uid ,-octal-)
  289. (gid ,%tar-header-gid ,-octal-)
  290. (size ,%tar-header-size ,-octal-)
  291. (mtime ,%tar-header-mtime ,-octal-)
  292. (chksum ,%tar-header-chksum ,-octal-)
  293. (typeflag ,tar-header-typeflag #f)
  294. (linkname ,%tar-header-linkname ,-zero-string-)
  295. (magic ,tar-header-magic ,-binary-string-)
  296. (version ,tar-header-version ,-binary-string-)
  297. (uname ,%tar-header-uname ,-zero-string-)
  298. (gname ,%tar-header-gname ,-zero-string-)
  299. (devmajor ,%tar-header-devmajor ,-octal-)
  300. (devminor ,%tar-header-devminor ,-octal-)
  301. (prefix ,%tar-header-prefix ,-zero-string-)
  302. (padding ,tar-header-padding ,-binary-string-)
  303. (data-padding ,tar-header-data-padding ,-binary-string-)
  304. (extension ,tar-header-extension ,(delay -tar-extension-)))
  305. #:elide-first-field? #t))
  306. (define %default-default-tar-header
  307. (make-tar-header
  308. (make-zero-string #f "")
  309. (make-padded-octal #o644 7 #\0 "")
  310. (make-padded-octal 0 7 #\0 "")
  311. (make-padded-octal 0 7 #\0 "")
  312. (make-padded-octal 0 11 #\0 "")
  313. (make-padded-octal 0 11 #\0 "")
  314. (make-padded-octal #f 6 #\0 "\x00 ")
  315. (char->integer #\0)
  316. (make-zero-string "" "")
  317. "ustar\x00"
  318. "00"
  319. (make-zero-string "" "")
  320. (make-zero-string "" "")
  321. (make-padded-octal 0 7 #\0 "")
  322. (make-padded-octal 0 7 #\0 "")
  323. (make-zero-string "" "")
  324. ""
  325. #f
  326. #f))
  327. (define (default-tar-header headers)
  328. (define all-fields (record-type-fields <tar-header>))
  329. (define field-counts
  330. (make-hash-table (length all-fields)))
  331. (define (count-field header field)
  332. (let* ((accessor (record-accessor <tar-header> field))
  333. (counts (hashq-ref field-counts field))
  334. (key (accessor header))
  335. (count (hash-ref counts key 0)))
  336. (hash-set! counts key (1+ count))))
  337. (define (field-mode field)
  338. (let ((counts (hashq-ref field-counts field)))
  339. (cdr (hash-fold (lambda (value count acc)
  340. (match-let (((best-count . best-value) acc))
  341. (if (> count best-count)
  342. (cons count value)
  343. acc)))
  344. '(0 . #f)
  345. counts))))
  346. (define (undefault-fields header)
  347. (let ((mtime (%tar-header-mtime header))
  348. (size (%tar-header-size header))
  349. (chksum (%tar-header-chksum header)))
  350. (set-fields header
  351. ((%tar-header-name zero-string-value) #f)
  352. ((%tar-header-mtime) (set-octal-value mtime 0))
  353. ((%tar-header-size) (set-octal-value size 0))
  354. ((%tar-header-chksum) (set-octal-value chksum #f)))))
  355. (for-each (lambda (field)
  356. (hashq-set! field-counts field
  357. (make-hash-table (length headers))))
  358. all-fields)
  359. (for-each (lambda (header)
  360. (for-each (lambda (field)
  361. (count-field header field))
  362. all-fields))
  363. headers)
  364. (undefault-fields
  365. (apply make-tar-header
  366. (map field-mode (record-type-fields <tar-header>)))))