tar_fix.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/env python3
  2. import tarfile
  3. import os
  4. class Tarball:
  5. def __init__(self, infile, outfile):
  6. self.infile = infile
  7. self.outfile = outfile
  8. def drop_lead_comp(self):
  9. """Removes leading path component (top-level dir)
  10. from input tar file."""
  11. with tarfile.open(self.infile) as tarin, tarfile.open(self.outfile, 'w:gz') as tarout:
  12. # Identify common top-level dir for all tarball
  13. # components, and proceed if it's set.
  14. lead_comp_name = os.path.commonpath(tarin.getnames())
  15. if lead_comp_name:
  16. prefix_len = len(lead_comp_name + '/')
  17. # Remove top-level dir (eg. "root.x86_64" or "root.i686"
  18. # in Hyperbola bootstrap tarballs) from the archive.
  19. tarin.members.remove(tarin.getmember(lead_comp_name))
  20. for m in tarin.members:
  21. # Drop top-level dir prefix in all tarball
  22. # component's paths.
  23. m.path = m.path[prefix_len:]
  24. # If component is a link, don't fetch its content.
  25. # There's no point to that, and it helps avoiding
  26. # KeyError("linkname 'something' not found") on "broken"
  27. # symlinks, which are perfectly normal in a
  28. # root FS tarball. And for hard links, the link
  29. # target needs to be stripped of the prefix same as
  30. # the file name.
  31. if m.linkname:
  32. if m.islnk():
  33. m.linkname = m.linkname[prefix_len:]
  34. tarout.addfile(m)
  35. else:
  36. tarout.addfile(m, tarin.extractfile(m))
  37. if __name__ == '__main__':
  38. import argparse
  39. parser = argparse.ArgumentParser(
  40. description="Remove leading path component from input tarball contents and save "
  41. "the result in output tarball.", add_help=False)
  42. group = parser.add_argument_group("Arguments")
  43. group.add_argument("--help", action='store_true',
  44. help="Show this help message and exit.")
  45. args = parser.parse_known_args()
  46. group.add_argument("--input", metavar='PATH', dest='infile',
  47. type=str, help="Input tar[.gz/xz/bz2] file path.",
  48. required=True)
  49. group.add_argument("--output", metavar='PATH', dest='outfile',
  50. type=str, help="Output tar.gz file path.",
  51. required=True)
  52. if args[0].help:
  53. parser.exit(parser.print_help())
  54. else:
  55. args = parser.parse_args()
  56. tarball = Tarball(args.infile, args.outfile)
  57. tarball.drop_lead_comp()
  58. # An error handling attempt I would like to remember. Note to self: it skips symlinks altogether.
  59. #
  60. # # Handle broken symlinks. They are perfectly normal in a root fs tarball, but tarfile module is not
  61. # # prepared for that. Trying hard not to catch anything other than "linkname 'something' not found".
  62. # try:
  63. # # Write each modified component to output tarball.
  64. # tarout.addfile(m, tarin.extractfile(m))
  65. #
  66. # except KeyError as error:
  67. # if "linkname '" and "' not found" in str(error):
  68. # print("Warning: the input tarball contains a dead symlink: '%s' to non-existent '%s'. No "
  69. # "biggy, but you might want to know. It will be included in the output tarball as it "
  70. # "is. Proceeding..." % (m.name, m.linkname), file=sys.stderr)
  71. # else:
  72. # raise
  73. # And a compound list for all tar members:
  74. #
  75. # [m.path[prefix_len:] for m in tarin.members]