upload.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643
  1. # Copyright (C) 2012, Ansgar Burchardt <ansgar@debian.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along
  14. # with this program; if not, write to the Free Software Foundation, Inc.,
  15. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  16. """module to handle uploads not yet installed to the archive
  17. This module provides classes to handle uploads not yet installed to the
  18. archive. Central is the L{Changes} class which represents a changes file.
  19. It provides methods to access the included binary and source packages.
  20. """
  21. import apt_inst
  22. import apt_pkg
  23. import errno
  24. import functools
  25. import os
  26. from daklib.aptversion import AptVersion
  27. from daklib.gpg import SignedFile
  28. from daklib.regexes import *
  29. import daklib.packagelist
  30. class UploadException(Exception):
  31. pass
  32. class InvalidChangesException(UploadException):
  33. pass
  34. class InvalidBinaryException(UploadException):
  35. pass
  36. class InvalidSourceException(UploadException):
  37. pass
  38. class InvalidHashException(UploadException):
  39. def __init__(self, filename, hash_name, expected, actual):
  40. self.filename = filename
  41. self.hash_name = hash_name
  42. self.expected = expected
  43. self.actual = actual
  44. def __str__(self):
  45. return ("Invalid {0} hash for {1}:\n"
  46. "According to the control file the {0} hash should be {2},\n"
  47. "but {1} has {3}.\n"
  48. "\n"
  49. "If you did not include {1} in your upload, a different version\n"
  50. "might already be known to the archive software.") \
  51. .format(self.hash_name, self.filename, self.expected, self.actual)
  52. class InvalidFilenameException(UploadException):
  53. def __init__(self, filename):
  54. self.filename = filename
  55. def __str__(self):
  56. return "Invalid filename '{0}'.".format(self.filename)
  57. class FileDoesNotExist(UploadException):
  58. def __init__(self, filename):
  59. self.filename = filename
  60. def __str__(self):
  61. return "Refers to non-existing file '{0}'".format(self.filename)
  62. class HashedFile(object):
  63. """file with checksums
  64. """
  65. def __init__(self, filename, size, md5sum, sha1sum, sha256sum, section=None, priority=None, input_filename=None):
  66. self.filename = filename
  67. """name of the file
  68. @type: str
  69. """
  70. if input_filename is None:
  71. input_filename = filename
  72. self.input_filename = input_filename
  73. """name of the file on disk
  74. Used for temporary files that should not be installed using their on-disk name.
  75. @type: str
  76. """
  77. self.size = size
  78. """size in bytes
  79. @type: long
  80. """
  81. self.md5sum = md5sum
  82. """MD5 hash in hexdigits
  83. @type: str
  84. """
  85. self.sha1sum = sha1sum
  86. """SHA1 hash in hexdigits
  87. @type: str
  88. """
  89. self.sha256sum = sha256sum
  90. """SHA256 hash in hexdigits
  91. @type: str
  92. """
  93. self.section = section
  94. """section or C{None}
  95. @type: str or C{None}
  96. """
  97. self.priority = priority
  98. """priority or C{None}
  99. @type: str of C{None}
  100. """
  101. @classmethod
  102. def from_file(cls, directory, filename, section=None, priority=None):
  103. """create with values for an existing file
  104. Create a C{HashedFile} object that refers to an already existing file.
  105. @type directory: str
  106. @param directory: directory the file is located in
  107. @type filename: str
  108. @param filename: filename
  109. @type section: str or C{None}
  110. @param section: optional section as given in .changes files
  111. @type priority: str or C{None}
  112. @param priority: optional priority as given in .changes files
  113. @rtype: L{HashedFile}
  114. @return: C{HashedFile} object for the given file
  115. """
  116. path = os.path.join(directory, filename)
  117. with open(path, 'r') as fh:
  118. size = os.fstat(fh.fileno()).st_size
  119. hashes = apt_pkg.Hashes(fh)
  120. return cls(filename, size, hashes.md5, hashes.sha1, hashes.sha256, section, priority)
  121. def check(self, directory):
  122. """Validate hashes
  123. Check if size and hashes match the expected value.
  124. @type directory: str
  125. @param directory: directory the file is located in
  126. @raise InvalidHashException: hash mismatch
  127. """
  128. path = os.path.join(directory, self.input_filename)
  129. try:
  130. with open(path) as fh:
  131. self.check_fh(fh)
  132. except IOError as e:
  133. if e.errno == errno.ENOENT:
  134. raise FileDoesNotExist(self.input_filename)
  135. raise
  136. def check_fh(self, fh):
  137. size = os.fstat(fh.fileno()).st_size
  138. fh.seek(0)
  139. hashes = apt_pkg.Hashes(fh)
  140. if size != self.size:
  141. raise InvalidHashException(self.filename, 'size', self.size, size)
  142. if hashes.md5 != self.md5sum:
  143. raise InvalidHashException(self.filename, 'md5sum', self.md5sum, hashes.md5)
  144. if hashes.sha1 != self.sha1sum:
  145. raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, hashes.sha1)
  146. if hashes.sha256 != self.sha256sum:
  147. raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, hashes.sha256)
  148. def parse_file_list(control, has_priority_and_section, safe_file_regexp=re_file_safe, fields=('Files', 'Checksums-Sha1', 'Checksums-Sha256')):
  149. """Parse Files and Checksums-* fields
  150. @type control: dict-like
  151. @param control: control file to take fields from
  152. @type has_priority_and_section: bool
  153. @param has_priority_and_section: Files field include section and priority
  154. (as in .changes)
  155. @raise InvalidChangesException: missing fields or other grave errors
  156. @rtype: dict
  157. @return: dict mapping filenames to L{daklib.upload.HashedFile} objects
  158. """
  159. entries = {}
  160. for line in control.get(fields[0], "").split('\n'):
  161. if len(line) == 0:
  162. continue
  163. if has_priority_and_section:
  164. (md5sum, size, section, priority, filename) = line.split()
  165. entry = dict(md5sum=md5sum, size=long(size), section=section, priority=priority, filename=filename)
  166. else:
  167. (md5sum, size, filename) = line.split()
  168. entry = dict(md5sum=md5sum, size=long(size), filename=filename)
  169. entries[filename] = entry
  170. for line in control.get(fields[1], "").split('\n'):
  171. if len(line) == 0:
  172. continue
  173. (sha1sum, size, filename) = line.split()
  174. entry = entries.get(filename, None)
  175. if entry is None:
  176. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[1], fields[0]))
  177. if entry is not None and entry.get('size', None) != long(size):
  178. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[1]))
  179. entry['sha1sum'] = sha1sum
  180. for line in control.get(fields[2], "").split('\n'):
  181. if len(line) == 0:
  182. continue
  183. (sha256sum, size, filename) = line.split()
  184. entry = entries.get(filename, None)
  185. if entry is None:
  186. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[2], fields[0]))
  187. if entry is not None and entry.get('size', None) != long(size):
  188. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[2]))
  189. entry['sha256sum'] = sha256sum
  190. files = {}
  191. for entry in entries.itervalues():
  192. filename = entry['filename']
  193. if 'size' not in entry:
  194. raise InvalidChangesException('No size for {0}.'.format(filename))
  195. if 'md5sum' not in entry:
  196. raise InvalidChangesException('No md5sum for {0}.'.format(filename))
  197. if 'sha1sum' not in entry:
  198. raise InvalidChangesException('No sha1sum for {0}.'.format(filename))
  199. if 'sha256sum' not in entry:
  200. raise InvalidChangesException('No sha256sum for {0}.'.format(filename))
  201. if safe_file_regexp is not None and not safe_file_regexp.match(filename):
  202. raise InvalidChangesException("{0}: References file with unsafe filename {1}.".format(self.filename, filename))
  203. f = files[filename] = HashedFile(**entry)
  204. return files
  205. @functools.total_ordering
  206. class Changes(object):
  207. """Representation of a .changes file
  208. """
  209. def __init__(self, directory, filename, keyrings, require_signature=True):
  210. if not re_file_safe.match(filename):
  211. raise InvalidChangesException('{0}: unsafe filename'.format(filename))
  212. self.directory = directory
  213. """directory the .changes is located in
  214. @type: str
  215. """
  216. self.filename = filename
  217. """name of the .changes file
  218. @type: str
  219. """
  220. data = open(self.path).read()
  221. self.signature = SignedFile(data, keyrings, require_signature)
  222. self.changes = apt_pkg.TagSection(self.signature.contents)
  223. """dict to access fields of the .changes file
  224. @type: dict-like
  225. """
  226. self._binaries = None
  227. self._source = None
  228. self._files = None
  229. self._keyrings = keyrings
  230. self._require_signature = require_signature
  231. @property
  232. def path(self):
  233. """path to the .changes file
  234. @type: str
  235. """
  236. return os.path.join(self.directory, self.filename)
  237. @property
  238. def primary_fingerprint(self):
  239. """fingerprint of the key used for signing the .changes file
  240. @type: str
  241. """
  242. return self.signature.primary_fingerprint
  243. @property
  244. def valid_signature(self):
  245. """C{True} if the .changes has a valid signature
  246. @type: bool
  247. """
  248. return self.signature.valid
  249. @property
  250. def weak_signature(self):
  251. """C{True} if the .changes was signed using a weak algorithm
  252. @type: bool
  253. """
  254. return self.signature.weak_signature
  255. @property
  256. def signature_timestamp(self):
  257. return self.signature.signature_timestamp
  258. @property
  259. def contents_sha1(self):
  260. return self.signature.contents_sha1
  261. @property
  262. def architectures(self):
  263. """list of architectures included in the upload
  264. @type: list of str
  265. """
  266. return self.changes.get('Architecture', '').split()
  267. @property
  268. def distributions(self):
  269. """list of target distributions for the upload
  270. @type: list of str
  271. """
  272. return self.changes['Distribution'].split()
  273. @property
  274. def source(self):
  275. """included source or C{None}
  276. @type: L{daklib.upload.Source} or C{None}
  277. """
  278. if self._source is None:
  279. source_files = []
  280. for f in self.files.itervalues():
  281. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename):
  282. source_files.append(f)
  283. if len(source_files) > 0:
  284. self._source = Source(self.directory, source_files, self._keyrings, self._require_signature)
  285. return self._source
  286. @property
  287. def sourceful(self):
  288. """C{True} if the upload includes source
  289. @type: bool
  290. """
  291. return "source" in self.architectures
  292. @property
  293. def source_name(self):
  294. """source package name
  295. @type: str
  296. """
  297. return re_field_source.match(self.changes['Source']).group('package')
  298. @property
  299. def binaries(self):
  300. """included binary packages
  301. @type: list of L{daklib.upload.Binary}
  302. """
  303. if self._binaries is None:
  304. binaries = []
  305. for f in self.files.itervalues():
  306. if re_file_binary.match(f.filename):
  307. binaries.append(Binary(self.directory, f))
  308. self._binaries = binaries
  309. return self._binaries
  310. @property
  311. def byhand_files(self):
  312. """included byhand files
  313. @type: list of L{daklib.upload.HashedFile}
  314. """
  315. byhand = []
  316. for f in self.files.itervalues():
  317. if f.section == 'byhand' or f.section[:4] == 'raw-':
  318. byhand.append(f)
  319. continue
  320. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename) or re_file_binary.match(f.filename):
  321. continue
  322. if re_file_buildinfo.match(f.filename):
  323. continue
  324. raise InvalidChangesException("{0}: {1} looks like a byhand package, but is in section {2}".format(self.filename, f.filename, f.section))
  325. return byhand
  326. @property
  327. def buildinfo_files(self):
  328. """included buildinfo files
  329. @type: list of L{daklib.upload.HashedFile}
  330. """
  331. buildinfo = []
  332. for f in self.files.itervalues():
  333. if re_file_buildinfo.match(f.filename):
  334. buildinfo.append(f)
  335. return buildinfo
  336. @property
  337. def binary_names(self):
  338. """names of included binary packages
  339. @type: list of str
  340. """
  341. return self.changes.get('Binary', '').split()
  342. @property
  343. def closed_bugs(self):
  344. """bugs closed by this upload
  345. @type: list of str
  346. """
  347. return self.changes.get('Closes', '').split()
  348. @property
  349. def files(self):
  350. """dict mapping filenames to L{daklib.upload.HashedFile} objects
  351. @type: dict
  352. """
  353. if self._files is None:
  354. self._files = parse_file_list(self.changes, True)
  355. return self._files
  356. @property
  357. def bytes(self):
  358. """total size of files included in this upload in bytes
  359. @type: number
  360. """
  361. count = 0
  362. for f in self.files.itervalues():
  363. count += f.size
  364. return count
  365. def _key(self):
  366. """tuple used to compare two changes files
  367. We sort by source name and version first. If these are identical,
  368. we sort changes that include source before those without source (so
  369. that sourceful uploads get processed first), and finally fall back
  370. to the filename (this should really never happen).
  371. @rtype: tuple
  372. """
  373. return (
  374. self.changes.get('Source'),
  375. AptVersion(self.changes.get('Version', '')),
  376. 'source' not in self.architectures,
  377. self.filename
  378. )
  379. def __eq__(self, other):
  380. return self._key() == other._key()
  381. def __lt__(self, other):
  382. return self._key() < other._key()
  383. class Binary(object):
  384. """Representation of a binary package
  385. """
  386. def __init__(self, directory, hashed_file):
  387. self.hashed_file = hashed_file
  388. """file object for the .deb
  389. @type: HashedFile
  390. """
  391. path = os.path.join(directory, hashed_file.input_filename)
  392. data = apt_inst.DebFile(path).control.extractdata("control")
  393. self.control = apt_pkg.TagSection(data)
  394. """dict to access fields in DEBIAN/control
  395. @type: dict-like
  396. """
  397. @classmethod
  398. def from_file(cls, directory, filename):
  399. hashed_file = HashedFile.from_file(directory, filename)
  400. return cls(directory, hashed_file)
  401. @property
  402. def source(self):
  403. """get tuple with source package name and version
  404. @type: tuple of str
  405. """
  406. source = self.control.get("Source", None)
  407. if source is None:
  408. return (self.control["Package"], self.control["Version"])
  409. match = re_field_source.match(source)
  410. if not match:
  411. raise InvalidBinaryException('{0}: Invalid Source field.'.format(self.hashed_file.filename))
  412. version = match.group('version')
  413. if version is None:
  414. version = self.control['Version']
  415. return (match.group('package'), version)
  416. @property
  417. def name(self):
  418. return self.control['Package']
  419. @property
  420. def type(self):
  421. """package type ('deb' or 'udeb')
  422. @type: str
  423. """
  424. match = re_file_binary.match(self.hashed_file.filename)
  425. if not match:
  426. raise InvalidBinaryException('{0}: Does not match re_file_binary'.format(self.hashed_file.filename))
  427. return match.group('type')
  428. @property
  429. def component(self):
  430. """component name
  431. @type: str
  432. """
  433. fields = self.control['Section'].split('/')
  434. if len(fields) > 1:
  435. return fields[0]
  436. return "main"
  437. class Source(object):
  438. """Representation of a source package
  439. """
  440. def __init__(self, directory, hashed_files, keyrings, require_signature=True):
  441. self.hashed_files = hashed_files
  442. """list of source files (including the .dsc itself)
  443. @type: list of L{HashedFile}
  444. """
  445. self._dsc_file = None
  446. for f in hashed_files:
  447. if re_file_dsc.match(f.filename):
  448. if self._dsc_file is not None:
  449. raise InvalidSourceException("Multiple .dsc found ({0} and {1})".format(self._dsc_file.filename, f.filename))
  450. else:
  451. self._dsc_file = f
  452. if self._dsc_file is None:
  453. raise InvalidSourceException("No .dsc included in source files")
  454. # make sure the hash for the dsc is valid before we use it
  455. self._dsc_file.check(directory)
  456. dsc_file_path = os.path.join(directory, self._dsc_file.input_filename)
  457. data = open(dsc_file_path, 'r').read()
  458. self.signature = SignedFile(data, keyrings, require_signature)
  459. self.dsc = apt_pkg.TagSection(self.signature.contents)
  460. """dict to access fields in the .dsc file
  461. @type: dict-like
  462. """
  463. self.package_list = daklib.packagelist.PackageList(self.dsc)
  464. """Information about packages built by the source.
  465. @type: daklib.packagelist.PackageList
  466. """
  467. self._files = None
  468. @classmethod
  469. def from_file(cls, directory, filename, keyrings, require_signature=True):
  470. hashed_file = HashedFile.from_file(directory, filename)
  471. return cls(directory, [hashed_file], keyrings, require_signature)
  472. @property
  473. def files(self):
  474. """dict mapping filenames to L{HashedFile} objects for additional source files
  475. This list does not include the .dsc itself.
  476. @type: dict
  477. """
  478. if self._files is None:
  479. self._files = parse_file_list(self.dsc, False)
  480. return self._files
  481. @property
  482. def primary_fingerprint(self):
  483. """fingerprint of the key used to sign the .dsc
  484. @type: str
  485. """
  486. return self.signature.primary_fingerprint
  487. @property
  488. def valid_signature(self):
  489. """C{True} if the .dsc has a valid signature
  490. @type: bool
  491. """
  492. return self.signature.valid
  493. @property
  494. def weak_signature(self):
  495. """C{True} if the .dsc was signed using a weak algorithm
  496. @type: bool
  497. """
  498. return self.signature.weak_signature
  499. @property
  500. def component(self):
  501. """guessed component name
  502. Might be wrong. Don't rely on this.
  503. @type: str
  504. """
  505. if 'Section' not in self.dsc:
  506. return 'main'
  507. fields = self.dsc['Section'].split('/')
  508. if len(fields) > 1:
  509. return fields[0]
  510. return "main"
  511. @property
  512. def filename(self):
  513. """filename of .dsc file
  514. @type: str
  515. """
  516. return self._dsc_file.filename