upload.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. # Copyright (C) 2012, Ansgar Burchardt <ansgar@debian.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along
  14. # with this program; if not, write to the Free Software Foundation, Inc.,
  15. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  16. """module to handle uploads not yet installed to the archive
  17. This module provides classes to handle uploads not yet installed to the
  18. archive. Central is the L{Changes} class which represents a changes file.
  19. It provides methods to access the included binary and source packages.
  20. """
  21. import apt_inst
  22. import apt_pkg
  23. import errno
  24. import os
  25. import re
  26. from daklib.gpg import SignedFile
  27. from daklib.regexes import *
  28. import daklib.packagelist
  29. class UploadException(Exception):
  30. pass
  31. class InvalidChangesException(UploadException):
  32. pass
  33. class InvalidBinaryException(UploadException):
  34. pass
  35. class InvalidSourceException(UploadException):
  36. pass
  37. class InvalidHashException(UploadException):
  38. def __init__(self, filename, hash_name, expected, actual):
  39. self.filename = filename
  40. self.hash_name = hash_name
  41. self.expected = expected
  42. self.actual = actual
  43. def __str__(self):
  44. return ("Invalid {0} hash for {1}:\n"
  45. "According to the control file the {0} hash should be {2},\n"
  46. "but {1} has {3}.\n"
  47. "\n"
  48. "If you did not include {1} in your upload, a different version\n"
  49. "might already be known to the archive software.") \
  50. .format(self.hash_name, self.filename, self.expected, self.actual)
  51. class InvalidFilenameException(UploadException):
  52. def __init__(self, filename):
  53. self.filename = filename
  54. def __str__(self):
  55. return "Invalid filename '{0}'.".format(self.filename)
  56. class FileDoesNotExist(UploadException):
  57. def __init__(self, filename):
  58. self.filename = filename
  59. def __str__(self):
  60. return "Refers to non-existing file '{0}'".format(self.filename)
  61. class HashedFile(object):
  62. """file with checksums
  63. """
  64. def __init__(self, filename, size, md5sum, sha1sum, sha256sum, section=None, priority=None, input_filename=None):
  65. self.filename = filename
  66. """name of the file
  67. @type: str
  68. """
  69. if input_filename is None:
  70. input_filename = filename
  71. self.input_filename = input_filename
  72. """name of the file on disk
  73. Used for temporary files that should not be installed using their on-disk name.
  74. @type: str
  75. """
  76. self.size = size
  77. """size in bytes
  78. @type: long
  79. """
  80. self.md5sum = md5sum
  81. """MD5 hash in hexdigits
  82. @type: str
  83. """
  84. self.sha1sum = sha1sum
  85. """SHA1 hash in hexdigits
  86. @type: str
  87. """
  88. self.sha256sum = sha256sum
  89. """SHA256 hash in hexdigits
  90. @type: str
  91. """
  92. self.section = section
  93. """section or C{None}
  94. @type: str or C{None}
  95. """
  96. self.priority = priority
  97. """priority or C{None}
  98. @type: str of C{None}
  99. """
  100. @classmethod
  101. def from_file(cls, directory, filename, section=None, priority=None):
  102. """create with values for an existing file
  103. Create a C{HashedFile} object that refers to an already existing file.
  104. @type directory: str
  105. @param directory: directory the file is located in
  106. @type filename: str
  107. @param filename: filename
  108. @type section: str or C{None}
  109. @param section: optional section as given in .changes files
  110. @type priority: str or C{None}
  111. @param priority: optional priority as given in .changes files
  112. @rtype: L{HashedFile}
  113. @return: C{HashedFile} object for the given file
  114. """
  115. path = os.path.join(directory, filename)
  116. with open(path, 'r') as fh:
  117. size = os.fstat(fh.fileno()).st_size
  118. hashes = apt_pkg.Hashes(fh)
  119. return cls(filename, size, hashes.md5, hashes.sha1, hashes.sha256, section, priority)
  120. def check(self, directory):
  121. """Validate hashes
  122. Check if size and hashes match the expected value.
  123. @type directory: str
  124. @param directory: directory the file is located in
  125. @raise InvalidHashException: hash mismatch
  126. """
  127. path = os.path.join(directory, self.input_filename)
  128. try:
  129. with open(path) as fh:
  130. self.check_fh(fh)
  131. except IOError as e:
  132. if e.errno == errno.ENOENT:
  133. raise FileDoesNotExist(self.input_filename)
  134. raise
  135. def check_fh(self, fh):
  136. size = os.fstat(fh.fileno()).st_size
  137. fh.seek(0)
  138. hashes = apt_pkg.Hashes(fh)
  139. if size != self.size:
  140. raise InvalidHashException(self.filename, 'size', self.size, size)
  141. if hashes.md5 != self.md5sum:
  142. raise InvalidHashException(self.filename, 'md5sum', self.md5sum, hashes.md5)
  143. if hashes.sha1 != self.sha1sum:
  144. raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, hashes.sha1)
  145. if hashes.sha256 != self.sha256sum:
  146. raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, hashes.sha256)
  147. def parse_file_list(control, has_priority_and_section, safe_file_regexp = re_file_safe, fields = ('Files', 'Checksums-Sha1', 'Checksums-Sha256')):
  148. """Parse Files and Checksums-* fields
  149. @type control: dict-like
  150. @param control: control file to take fields from
  151. @type has_priority_and_section: bool
  152. @param has_priority_and_section: Files field include section and priority
  153. (as in .changes)
  154. @raise InvalidChangesException: missing fields or other grave errors
  155. @rtype: dict
  156. @return: dict mapping filenames to L{daklib.upload.HashedFile} objects
  157. """
  158. entries = {}
  159. for line in control.get(fields[0], "").split('\n'):
  160. if len(line) == 0:
  161. continue
  162. if has_priority_and_section:
  163. (md5sum, size, section, priority, filename) = line.split()
  164. entry = dict(md5sum=md5sum, size=long(size), section=section, priority=priority, filename=filename)
  165. else:
  166. (md5sum, size, filename) = line.split()
  167. entry = dict(md5sum=md5sum, size=long(size), filename=filename)
  168. entries[filename] = entry
  169. for line in control.get(fields[1], "").split('\n'):
  170. if len(line) == 0:
  171. continue
  172. (sha1sum, size, filename) = line.split()
  173. entry = entries.get(filename, None)
  174. if entry is None:
  175. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[1], fields[0]))
  176. if entry is not None and entry.get('size', None) != long(size):
  177. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[1]))
  178. entry['sha1sum'] = sha1sum
  179. for line in control.get(fields[2], "").split('\n'):
  180. if len(line) == 0:
  181. continue
  182. (sha256sum, size, filename) = line.split()
  183. entry = entries.get(filename, None)
  184. if entry is None:
  185. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[2], fields[0]))
  186. if entry is not None and entry.get('size', None) != long(size):
  187. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[2]))
  188. entry['sha256sum'] = sha256sum
  189. files = {}
  190. for entry in entries.itervalues():
  191. filename = entry['filename']
  192. if 'size' not in entry:
  193. raise InvalidChangesException('No size for {0}.'.format(filename))
  194. if 'md5sum' not in entry:
  195. raise InvalidChangesException('No md5sum for {0}.'.format(filename))
  196. if 'sha1sum' not in entry:
  197. raise InvalidChangesException('No sha1sum for {0}.'.format(filename))
  198. if 'sha256sum' not in entry:
  199. raise InvalidChangesException('No sha256sum for {0}.'.format(filename))
  200. if safe_file_regexp is not None and not safe_file_regexp.match(filename):
  201. raise InvalidChangesException("{0}: References file with unsafe filename {1}.".format(self.filename, filename))
  202. f = files[filename] = HashedFile(**entry)
  203. return files
  204. class Changes(object):
  205. """Representation of a .changes file
  206. """
  207. def __init__(self, directory, filename, keyrings, require_signature=True):
  208. if not re_file_safe.match(filename):
  209. raise InvalidChangesException('{0}: unsafe filename'.format(filename))
  210. self.directory = directory
  211. """directory the .changes is located in
  212. @type: str
  213. """
  214. self.filename = filename
  215. """name of the .changes file
  216. @type: str
  217. """
  218. data = open(self.path).read()
  219. self._signed_file = SignedFile(data, keyrings, require_signature)
  220. self.changes = apt_pkg.TagSection(self._signed_file.contents)
  221. """dict to access fields of the .changes file
  222. @type: dict-like
  223. """
  224. self._binaries = None
  225. self._source = None
  226. self._files = None
  227. self._keyrings = keyrings
  228. self._require_signature = require_signature
  229. @property
  230. def path(self):
  231. """path to the .changes file
  232. @type: str
  233. """
  234. return os.path.join(self.directory, self.filename)
  235. @property
  236. def primary_fingerprint(self):
  237. """fingerprint of the key used for signing the .changes file
  238. @type: str
  239. """
  240. return self._signed_file.primary_fingerprint
  241. @property
  242. def valid_signature(self):
  243. """C{True} if the .changes has a valid signature
  244. @type: bool
  245. """
  246. return self._signed_file.valid
  247. @property
  248. def signature_timestamp(self):
  249. return self._signed_file.signature_timestamp
  250. @property
  251. def contents_sha1(self):
  252. return self._signed_file.contents_sha1
  253. @property
  254. def architectures(self):
  255. """list of architectures included in the upload
  256. @type: list of str
  257. """
  258. return self.changes.get('Architecture', '').split()
  259. @property
  260. def distributions(self):
  261. """list of target distributions for the upload
  262. @type: list of str
  263. """
  264. return self.changes['Distribution'].split()
  265. @property
  266. def source(self):
  267. """included source or C{None}
  268. @type: L{daklib.upload.Source} or C{None}
  269. """
  270. if self._source is None:
  271. source_files = []
  272. for f in self.files.itervalues():
  273. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename):
  274. source_files.append(f)
  275. if len(source_files) > 0:
  276. self._source = Source(self.directory, source_files, self._keyrings, self._require_signature)
  277. return self._source
  278. @property
  279. def sourceful(self):
  280. """C{True} if the upload includes source
  281. @type: bool
  282. """
  283. return "source" in self.architectures
  284. @property
  285. def source_name(self):
  286. """source package name
  287. @type: str
  288. """
  289. return re_field_source.match(self.changes['Source']).group('package')
  290. @property
  291. def binaries(self):
  292. """included binary packages
  293. @type: list of L{daklib.upload.Binary}
  294. """
  295. if self._binaries is None:
  296. binaries = []
  297. for f in self.files.itervalues():
  298. if re_file_binary.match(f.filename):
  299. binaries.append(Binary(self.directory, f))
  300. self._binaries = binaries
  301. return self._binaries
  302. @property
  303. def byhand_files(self):
  304. """included byhand files
  305. @type: list of L{daklib.upload.HashedFile}
  306. """
  307. byhand = []
  308. for f in self.files.itervalues():
  309. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename) or re_file_binary.match(f.filename):
  310. continue
  311. if f.section != 'byhand' and f.section[:4] != 'raw-':
  312. raise InvalidChangesException("{0}: {1} looks like a byhand package, but is in section {2}".format(self.filename, f.filename, f.section))
  313. byhand.append(f)
  314. return byhand
  315. @property
  316. def binary_names(self):
  317. """names of included binary packages
  318. @type: list of str
  319. """
  320. return self.changes['Binary'].split()
  321. @property
  322. def closed_bugs(self):
  323. """bugs closed by this upload
  324. @type: list of str
  325. """
  326. return self.changes.get('Closes', '').split()
  327. @property
  328. def files(self):
  329. """dict mapping filenames to L{daklib.upload.HashedFile} objects
  330. @type: dict
  331. """
  332. if self._files is None:
  333. self._files = parse_file_list(self.changes, True)
  334. return self._files
  335. @property
  336. def bytes(self):
  337. """total size of files included in this upload in bytes
  338. @type: number
  339. """
  340. count = 0
  341. for f in self.files.itervalues():
  342. count += f.size
  343. return count
  344. def __cmp__(self, other):
  345. """compare two changes files
  346. We sort by source name and version first. If these are identical,
  347. we sort changes that include source before those without source (so
  348. that sourceful uploads get processed first), and finally fall back
  349. to the filename (this should really never happen).
  350. @rtype: number
  351. @return: n where n < 0 if self < other, n = 0 if self == other, n > 0 if self > other
  352. """
  353. ret = cmp(self.changes.get('Source'), other.changes.get('Source'))
  354. if ret == 0:
  355. # compare version
  356. ret = apt_pkg.version_compare(self.changes.get('Version', ''), other.changes.get('Version', ''))
  357. if ret == 0:
  358. # sort changes with source before changes without source
  359. if 'source' in self.architectures and 'source' not in other.architectures:
  360. ret = -1
  361. elif 'source' not in self.architectures and 'source' in other.architectures:
  362. ret = 1
  363. else:
  364. ret = 0
  365. if ret == 0:
  366. # fall back to filename
  367. ret = cmp(self.filename, other.filename)
  368. return ret
  369. class Binary(object):
  370. """Representation of a binary package
  371. """
  372. def __init__(self, directory, hashed_file):
  373. self.hashed_file = hashed_file
  374. """file object for the .deb
  375. @type: HashedFile
  376. """
  377. path = os.path.join(directory, hashed_file.input_filename)
  378. data = apt_inst.DebFile(path).control.extractdata("control")
  379. self.control = apt_pkg.TagSection(data)
  380. """dict to access fields in DEBIAN/control
  381. @type: dict-like
  382. """
  383. @classmethod
  384. def from_file(cls, directory, filename):
  385. hashed_file = HashedFile.from_file(directory, filename)
  386. return cls(directory, hashed_file)
  387. @property
  388. def source(self):
  389. """get tuple with source package name and version
  390. @type: tuple of str
  391. """
  392. source = self.control.get("Source", None)
  393. if source is None:
  394. return (self.control["Package"], self.control["Version"])
  395. match = re_field_source.match(source)
  396. if not match:
  397. raise InvalidBinaryException('{0}: Invalid Source field.'.format(self.hashed_file.filename))
  398. version = match.group('version')
  399. if version is None:
  400. version = self.control['Version']
  401. return (match.group('package'), version)
  402. @property
  403. def name(self):
  404. return self.control['Package']
  405. @property
  406. def type(self):
  407. """package type ('deb' or 'udeb')
  408. @type: str
  409. """
  410. match = re_file_binary.match(self.hashed_file.filename)
  411. if not match:
  412. raise InvalidBinaryException('{0}: Does not match re_file_binary'.format(self.hashed_file.filename))
  413. return match.group('type')
  414. @property
  415. def component(self):
  416. """component name
  417. @type: str
  418. """
  419. fields = self.control['Section'].split('/')
  420. if len(fields) > 1:
  421. return fields[0]
  422. return "main"
  423. class Source(object):
  424. """Representation of a source package
  425. """
  426. def __init__(self, directory, hashed_files, keyrings, require_signature=True):
  427. self.hashed_files = hashed_files
  428. """list of source files (including the .dsc itself)
  429. @type: list of L{HashedFile}
  430. """
  431. self._dsc_file = None
  432. for f in hashed_files:
  433. if re_file_dsc.match(f.filename):
  434. if self._dsc_file is not None:
  435. raise InvalidSourceException("Multiple .dsc found ({0} and {1})".format(self._dsc_file.filename, f.filename))
  436. else:
  437. self._dsc_file = f
  438. # make sure the hash for the dsc is valid before we use it
  439. self._dsc_file.check(directory)
  440. dsc_file_path = os.path.join(directory, self._dsc_file.input_filename)
  441. data = open(dsc_file_path, 'r').read()
  442. self._signed_file = SignedFile(data, keyrings, require_signature)
  443. self.dsc = apt_pkg.TagSection(self._signed_file.contents)
  444. """dict to access fields in the .dsc file
  445. @type: dict-like
  446. """
  447. self.package_list = daklib.packagelist.PackageList(self.dsc)
  448. """Information about packages built by the source.
  449. @type: daklib.packagelist.PackageList
  450. """
  451. self._files = None
  452. @classmethod
  453. def from_file(cls, directory, filename, keyrings, require_signature=True):
  454. hashed_file = HashedFile.from_file(directory, filename)
  455. return cls(directory, [hashed_file], keyrings, require_signature)
  456. @property
  457. def files(self):
  458. """dict mapping filenames to L{HashedFile} objects for additional source files
  459. This list does not include the .dsc itself.
  460. @type: dict
  461. """
  462. if self._files is None:
  463. self._files = parse_file_list(self.dsc, False)
  464. return self._files
  465. @property
  466. def primary_fingerprint(self):
  467. """fingerprint of the key used to sign the .dsc
  468. @type: str
  469. """
  470. return self._signed_file.primary_fingerprint
  471. @property
  472. def valid_signature(self):
  473. """C{True} if the .dsc has a valid signature
  474. @type: bool
  475. """
  476. return self._signed_file.valid
  477. @property
  478. def component(self):
  479. """guessed component name
  480. Might be wrong. Don't rely on this.
  481. @type: str
  482. """
  483. if 'Section' not in self.dsc:
  484. return 'main'
  485. fields = self.dsc['Section'].split('/')
  486. if len(fields) > 1:
  487. return fields[0]
  488. return "main"
  489. @property
  490. def filename(self):
  491. """filename of .dsc file
  492. @type: str
  493. """
  494. return self._dsc_file.filename