extract.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. # Extract files from archives.
  2. from os import O_CREAT, O_WRONLY, fdopen, mkdir, open as osopen, utime
  3. try:
  4. from os import O_BINARY
  5. except ImportError:
  6. # Platforms that do not define O_BINARY do not need it either.
  7. O_BINARY = 0
  8. try:
  9. from os import symlink
  10. except ImportError:
  11. def symlink(source, link_name):
  12. raise OSError('OS does not support symlink creation')
  13. from os.path import abspath, isdir, join as joinpath, sep, split as splitpath
  14. from stat import S_IRWXU, S_IRWXG, S_IRWXO, S_IXUSR, S_IXGRP, S_IXOTH
  15. from tarfile import TarFile
  16. import sys
  17. from detectsys import detectOS
  18. hostOS = detectOS()
  19. # Note: Larger buffers might make extraction slower.
  20. bufSize = 16384
  21. def extract(archivePath, destDir, rename = None):
  22. '''Extract the given archive to the given directory.
  23. If a rename function is given, it is called with the output path relative
  24. to the destination directory; the value returned by the rename function is
  25. used as the actual relative destination file path.
  26. This function sets file ownership and permissions like is done in newly
  27. created files and ignores the ownership and permissions from the archive,
  28. since we are not restoring a backup.
  29. '''
  30. absDestDir = abspath(destDir) + sep
  31. if not isdir(absDestDir):
  32. raise ValueError(
  33. 'Destination directory "%s" does not exist' % absDestDir
  34. )
  35. with TarFile.open(archivePath, errorlevel=2) as tar:
  36. for member in tar.getmembers():
  37. absMemberPath = abspath(joinpath(absDestDir, member.name))
  38. if member.isdir():
  39. absMemberPath += sep
  40. if not absMemberPath.startswith(absDestDir):
  41. raise ValueError(
  42. 'Refusing to extract tar entry "%s" '
  43. 'outside destination directory'
  44. % member.name
  45. )
  46. if rename:
  47. absMemberPath = absDestDir + rename(
  48. absMemberPath[len(absDestDir) : ]
  49. )
  50. if member.isfile():
  51. mode = S_IRWXU | S_IRWXG | S_IRWXO
  52. if not (member.mode & S_IXUSR):
  53. mode &= ~(S_IXUSR | S_IXGRP | S_IXOTH)
  54. fd = osopen(absMemberPath, O_CREAT | O_WRONLY | O_BINARY, mode)
  55. with fdopen(fd, 'wb') as out:
  56. inp = tar.extractfile(member)
  57. bytesLeft = member.size
  58. while bytesLeft > 0:
  59. buf = inp.read(bufSize)
  60. out.write(buf)
  61. bytesLeft -= len(buf)
  62. elif member.isdir():
  63. if not isdir(absMemberPath):
  64. mkdir(absMemberPath)
  65. elif member.issym():
  66. try:
  67. symlink(member.linkname, absMemberPath)
  68. except OSError as ex:
  69. print(
  70. 'WARNING: Skipping symlink creation: %s -> %s: %s'
  71. % (absMemberPath, member.linkname, ex)
  72. )
  73. else:
  74. raise ValueError(
  75. 'Cannot extract tar entry "%s": '
  76. 'not a regular file, symlink or directory'
  77. % member.name
  78. )
  79. # Set file/directory modification time to match the archive.
  80. # For example autotools track dependencies between archived files
  81. # and will attempt to regenerate them if the time stamps indicate
  82. # one is older than the other.
  83. # Note: Apparently Python 2.5's utime() cannot set timestamps on
  84. # directories in Windows.
  85. if member.isfile() or (
  86. member.isdir() and not hostOS.startswith('mingw')
  87. ):
  88. utime(absMemberPath, (member.mtime, member.mtime))
  89. class TopLevelDirRenamer(object):
  90. def __init__(self, newName):
  91. self.newName = newName
  92. def __call__(self, oldPath):
  93. head, tail = splitpath(oldPath)
  94. headParts = head.split(sep)
  95. if not headParts:
  96. raise ValueError(
  97. 'Directory part is empty for entry "%s"' % oldPath
  98. )
  99. headParts[0] = self.newName
  100. return sep.join(headParts + [ tail ])
  101. if __name__ == '__main__':
  102. if 3 <= len(sys.argv) <= 4:
  103. if len(sys.argv) == 4:
  104. renameTopLevelDir = TopLevelDirRenamer(sys.argv[3])
  105. else:
  106. renameTopLevelDir = None
  107. extract(sys.argv[1], sys.argv[2], renameTopLevelDir)
  108. else:
  109. print('Usage: python3 extract.py archive destination [new-top-level-dir]', file=sys.stderr)
  110. sys.exit(2)