checksums.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. #!/usr/bin/python
  2. # This Source Code Form is subject to the terms of the Mozilla Public
  3. # License, v. 2.0. If a copy of the MPL was not distributed with this
  4. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
  5. from __future__ import with_statement
  6. from optparse import OptionParser
  7. import logging
  8. import os
  9. try:
  10. import hashlib
  11. except:
  12. hashlib = None
  13. def digest_file(filename, digest, chunk_size=1024):
  14. '''Produce a checksum for the file specified by 'filename'. 'filename'
  15. is a string path to a file that is opened and read in this function. The
  16. checksum algorithm is specified by 'digest' and is a valid OpenSSL
  17. algorithm. If the digest used is not valid or Python's hashlib doesn't
  18. work, the None object will be returned instead. The size of blocks
  19. that this function will read from the file object it opens based on
  20. 'filename' can be specified by 'chunk_size', which defaults to 1K'''
  21. assert not os.path.isdir(filename), 'this function only works with files'
  22. logger = logging.getLogger('checksums.py')
  23. if hashlib is not None:
  24. logger.debug('Creating new %s object' % digest)
  25. h = hashlib.new(digest)
  26. with open(filename, 'rb') as f:
  27. while True:
  28. data = f.read(chunk_size)
  29. if not data:
  30. logger.debug('Finished reading in file')
  31. break
  32. h.update(data)
  33. hash = h.hexdigest()
  34. logger.debug('Hash for %s is %s' % (filename, hash))
  35. return hash
  36. else:
  37. # In this case we could subprocess.Popen and .communicate with
  38. # sha1sum or md5sum
  39. logger.warn('The python module for hashlib is missing!')
  40. return None
  41. def process_files(files, output_filename, digests, strip):
  42. '''This function takes a list of file names, 'files'. It will then
  43. compute the checksum for each of the files by opening the files.
  44. Once each file is read and its checksum is computed, this function
  45. will write the information to the file specified by 'output_filename'.
  46. The path written in the output file will have anything specified by 'strip'
  47. removed from the path. The output file is closed before returning nothing
  48. The algorithm to compute checksums with can be specified by 'digests'
  49. and needs to be a list of valid OpenSSL algorithms.
  50. The output file is written in the format:
  51. <hash> <algorithm> <filesize> <filepath>
  52. Example:
  53. d1fa09a<snip>e4220 sha1 14250744 firefox-4.0b6pre.en-US.mac64.dmg
  54. '''
  55. logger = logging.getLogger('checksums.py')
  56. if os.path.exists(output_filename):
  57. logger.debug('Overwriting existing checksums file "%s"' %
  58. output_filename)
  59. else:
  60. logger.debug('Creating a new checksums file "%s"' % output_filename)
  61. with open(output_filename, 'w+') as output:
  62. for file in files:
  63. if os.path.isdir(file):
  64. logger.warn('%s is a directory, skipping' % file)
  65. else:
  66. for digest in digests:
  67. hash = digest_file(file, digest)
  68. if hash is None:
  69. logger.warn('Unable to generate a hash for %s. ' +
  70. 'Skipping.' % file)
  71. continue
  72. if file.startswith(strip):
  73. short_file = file[len(strip):]
  74. short_file = short_file.lstrip('/')
  75. else:
  76. short_file = file
  77. print >>output, '%s %s %s %s' % (hash, digest,
  78. os.path.getsize(file),
  79. short_file)
  80. def setup_logging(level=logging.DEBUG):
  81. '''This function sets up the logging module using a speficiable logging
  82. module logging level. The default log level is DEBUG.
  83. The output is in the format:
  84. <level> - <message>
  85. Example:
  86. DEBUG - Finished reading in file
  87. '''
  88. logger = logging.getLogger('checksums.py')
  89. logger.setLevel(logging.DEBUG)
  90. handler = logging.StreamHandler()
  91. handler.setLevel(level)
  92. formatter = logging.Formatter("%(levelname)s - %(message)s")
  93. handler.setFormatter(formatter)
  94. logger.addHandler(handler)
  95. def main():
  96. '''This is a main function that parses arguments, sets up logging
  97. and generates a checksum file'''
  98. # Parse command line arguments
  99. parser = OptionParser()
  100. parser.add_option('-d', '--digest', help='checksum algorithm to use',
  101. action='append', dest='digests')
  102. parser.add_option('-o', '--output', help='output file to use',
  103. action='store', dest='outfile', default='checksums')
  104. parser.add_option('-v', '--verbose',
  105. help='Be noisy (takes precedence over quiet)',
  106. action='store_true', dest='verbose', default=False)
  107. parser.add_option('-q', '--quiet', help='Be quiet', action='store_true',
  108. dest='quiet', default=False)
  109. parser.add_option('-s', '--strip',
  110. help='strip this path from the filenames',
  111. dest='strip', default=os.getcwd())
  112. options, args = parser.parse_args()
  113. #Figure out which logging level to use
  114. if options.verbose:
  115. loglevel = logging.DEBUG
  116. elif options.quiet:
  117. loglevel = logging.ERROR
  118. else:
  119. loglevel = logging.INFO
  120. #Set up logging
  121. setup_logging(loglevel)
  122. logger = logging.getLogger('checksums.py')
  123. # Validate the digest type to use
  124. if not options.digests:
  125. options.digests = ['sha1']
  126. try:
  127. for digest in options.digests:
  128. hashlib.new(digest)
  129. except ValueError, ve:
  130. logger.error('Could not create a "%s" hash object (%s)' %
  131. (digest, ve.args[0]))
  132. exit(1)
  133. # Validate the files to checksum
  134. files = []
  135. for i in args:
  136. if os.path.exists(i):
  137. files.append(i)
  138. else:
  139. logger.info('File "%s" was not found on the filesystem' % i)
  140. process_files(files, options.outfile, options.digests, options.strip)
  141. if __name__ == '__main__':
  142. main()