link_morgue 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #! /usr/bin/env python3
  2. #
  3. # Copyright (C) 2019, Ansgar Burchardt <ansgar@debian.org>
  4. # License: GPL-2+
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. import argparse
  19. import errno
  20. import gzip
  21. import hashlib
  22. import os
  23. import signal
  24. import sys
  25. from contextlib import contextmanager
  26. from typing import BinaryIO, Set
  27. Hashes = Set[bytes]
  28. def hash_fh(fh: BinaryIO) -> bytes:
  29. h = hashlib.sha1()
  30. buf = b"dummy"
  31. while len(buf) > 0:
  32. buf = fh.read(32768)
  33. h.update(buf)
  34. return h.hexdigest().encode('ascii')
  35. def hash_file(filename: bytes) -> bytes:
  36. with open(filename, 'rb') as fh:
  37. return hash_fh(fh)
  38. def load_hashes(path) -> Hashes:
  39. with gzip.open(path, 'rb') as fh:
  40. return set(h.strip() for h in fh)
  41. @contextmanager
  42. def IgnoreSignals():
  43. handlers = [
  44. (sig, signal.signal(sig, signal.SIG_IGN))
  45. for sig in (signal.SIGHUP, signal.SIGINT, signal.SIGTERM)
  46. ]
  47. yield
  48. for sig, handler in handlers:
  49. if handler is None:
  50. handler = SIG_DFL
  51. signal.signal(sig, handler)
  52. def replace_file(path: bytes, hash: bytes, base: bytes) -> None:
  53. target = os.path.join(base, hash[0:2], hash[2:4], hash)
  54. with IgnoreSignals():
  55. os.unlink(path)
  56. os.symlink(target, path)
  57. def keep_file(path: bytes) -> None:
  58. target = path + b".nosnapshot"
  59. with open(target, 'x') as fh:
  60. pass
  61. def process_file(path: bytes, known_hashes: Hashes, base: bytes) -> None:
  62. """
  63. Replace file `path` with a symlink below `base` if the file is
  64. known, otherwise create `{path}.nosnapshot` to avoid checking the file
  65. again later.
  66. """
  67. h = hash_file(path)
  68. if h in known_hashes:
  69. replace_file(path, h, base)
  70. else:
  71. keep_file(path)
  72. def scan_directory(path: bytes):
  73. """
  74. Returns paths to regular files in `path` and subdirectories,
  75. skipping `*.nosnapshot` and files `fn` for which `{fn}.nosnapshot`
  76. exists.
  77. """
  78. directories = []
  79. filenames = []
  80. # We do not use `os.walk` as `os.scandir` allows us to skip
  81. # symlinks without an extra `stat()` call.
  82. for entry in os.scandir(path):
  83. if entry.is_dir(follow_symlinks=False):
  84. directories.append(entry.path)
  85. elif entry.is_file(follow_symlinks=False):
  86. filenames.append(entry.path)
  87. yield from (fn for fn in filenames
  88. if fn + b".nosnapshot" not in filenames
  89. and not fn.endswith(b".nosnapshot"))
  90. for path in directories:
  91. yield from scan_directory(path)
  92. def process_directory(path: bytes, known_hashes: Hashes, base: bytes) -> None:
  93. os.chdir(path)
  94. for fn in scan_directory(b"."):
  95. process_file(fn, known_hashes, base)
  96. def run(config):
  97. known_hashes = load_hashes(config.known_hashes)
  98. process_directory(config.morguedir.encode(), known_hashes, config.farmdir.encode())
  99. def main(argv=sys.argv[1:]):
  100. parser = argparse.ArgumentParser(
  101. description="replace files in morgue with symlinks to snapshot.d.o"
  102. )
  103. parser.add_argument("--known-hashes", type=str, required=True)
  104. parser.add_argument("--farmdir", type=str, required=True)
  105. parser.add_argument("--morguedir", type=str, required=True)
  106. config = parser.parse_args(argv)
  107. run(config)
  108. if __name__ == "__main__":
  109. main()