upload.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647
  1. # Copyright (C) 2012, Ansgar Burchardt <ansgar@debian.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along
  14. # with this program; if not, write to the Free Software Foundation, Inc.,
  15. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  16. """module to handle uploads not yet installed to the archive
  17. This module provides classes to handle uploads not yet installed to the
  18. archive. Central is the L{Changes} class which represents a changes file.
  19. It provides methods to access the included binary and source packages.
  20. """
  21. import apt_inst
  22. import apt_pkg
  23. import errno
  24. import functools
  25. import os
  26. import six
  27. from daklib.aptversion import AptVersion
  28. from daklib.gpg import SignedFile
  29. from daklib.regexes import *
  30. import daklib.dakapt
  31. import daklib.packagelist
  32. class UploadException(Exception):
  33. pass
  34. class InvalidChangesException(UploadException):
  35. pass
  36. class InvalidBinaryException(UploadException):
  37. pass
  38. class InvalidSourceException(UploadException):
  39. pass
  40. class InvalidHashException(UploadException):
  41. def __init__(self, filename, hash_name, expected, actual):
  42. self.filename = filename
  43. self.hash_name = hash_name
  44. self.expected = expected
  45. self.actual = actual
  46. def __str__(self):
  47. return ("Invalid {0} hash for {1}:\n"
  48. "According to the control file the {0} hash should be {2},\n"
  49. "but {1} has {3}.\n"
  50. "\n"
  51. "If you did not include {1} in your upload, a different version\n"
  52. "might already be known to the archive software.") \
  53. .format(self.hash_name, self.filename, self.expected, self.actual)
  54. class InvalidFilenameException(UploadException):
  55. def __init__(self, filename):
  56. self.filename = filename
  57. def __str__(self):
  58. return "Invalid filename '{0}'.".format(self.filename)
  59. class FileDoesNotExist(UploadException):
  60. def __init__(self, filename):
  61. self.filename = filename
  62. def __str__(self):
  63. return "Refers to non-existing file '{0}'".format(self.filename)
  64. class HashedFile(object):
  65. """file with checksums
  66. """
  67. def __init__(self, filename, size, md5sum, sha1sum, sha256sum, section=None, priority=None, input_filename=None):
  68. self.filename = filename
  69. """name of the file
  70. @type: str
  71. """
  72. if input_filename is None:
  73. input_filename = filename
  74. self.input_filename = input_filename
  75. """name of the file on disk
  76. Used for temporary files that should not be installed using their on-disk name.
  77. @type: str
  78. """
  79. self.size = size
  80. """size in bytes
  81. @type: long
  82. """
  83. self.md5sum = md5sum
  84. """MD5 hash in hexdigits
  85. @type: str
  86. """
  87. self.sha1sum = sha1sum
  88. """SHA1 hash in hexdigits
  89. @type: str
  90. """
  91. self.sha256sum = sha256sum
  92. """SHA256 hash in hexdigits
  93. @type: str
  94. """
  95. self.section = section
  96. """section or C{None}
  97. @type: str or C{None}
  98. """
  99. self.priority = priority
  100. """priority or C{None}
  101. @type: str of C{None}
  102. """
  103. @classmethod
  104. def from_file(cls, directory, filename, section=None, priority=None):
  105. """create with values for an existing file
  106. Create a C{HashedFile} object that refers to an already existing file.
  107. @type directory: str
  108. @param directory: directory the file is located in
  109. @type filename: str
  110. @param filename: filename
  111. @type section: str or C{None}
  112. @param section: optional section as given in .changes files
  113. @type priority: str or C{None}
  114. @param priority: optional priority as given in .changes files
  115. @rtype: L{HashedFile}
  116. @return: C{HashedFile} object for the given file
  117. """
  118. path = os.path.join(directory, filename)
  119. with open(path, 'r') as fh:
  120. size = os.fstat(fh.fileno()).st_size
  121. hashes = daklib.dakapt.DakHashes(fh)
  122. return cls(filename, size, hashes.md5, hashes.sha1, hashes.sha256, section, priority)
  123. def check(self, directory):
  124. """Validate hashes
  125. Check if size and hashes match the expected value.
  126. @type directory: str
  127. @param directory: directory the file is located in
  128. @raise InvalidHashException: hash mismatch
  129. """
  130. path = os.path.join(directory, self.input_filename)
  131. try:
  132. with open(path) as fh:
  133. self.check_fh(fh)
  134. except IOError as e:
  135. if e.errno == errno.ENOENT:
  136. raise FileDoesNotExist(self.input_filename)
  137. raise
  138. def check_fh(self, fh):
  139. size = os.fstat(fh.fileno()).st_size
  140. fh.seek(0)
  141. hashes = daklib.dakapt.DakHashes(fh)
  142. if size != self.size:
  143. raise InvalidHashException(self.filename, 'size', self.size, size)
  144. if hashes.md5 != self.md5sum:
  145. raise InvalidHashException(self.filename, 'md5sum', self.md5sum, hashes.md5)
  146. if hashes.sha1 != self.sha1sum:
  147. raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, hashes.sha1)
  148. if hashes.sha256 != self.sha256sum:
  149. raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, hashes.sha256)
  150. def parse_file_list(control, has_priority_and_section, safe_file_regexp=re_file_safe, fields=('Files', 'Checksums-Sha1', 'Checksums-Sha256')):
  151. """Parse Files and Checksums-* fields
  152. @type control: dict-like
  153. @param control: control file to take fields from
  154. @type has_priority_and_section: bool
  155. @param has_priority_and_section: Files field include section and priority
  156. (as in .changes)
  157. @raise InvalidChangesException: missing fields or other grave errors
  158. @rtype: dict
  159. @return: dict mapping filenames to L{daklib.upload.HashedFile} objects
  160. """
  161. entries = {}
  162. for line in control.get(fields[0], "").split('\n'):
  163. if len(line) == 0:
  164. continue
  165. if has_priority_and_section:
  166. (md5sum, size, section, priority, filename) = line.split()
  167. entry = dict(md5sum=md5sum, size=int(size), section=section, priority=priority, filename=filename)
  168. else:
  169. (md5sum, size, filename) = line.split()
  170. entry = dict(md5sum=md5sum, size=int(size), filename=filename)
  171. entries[filename] = entry
  172. for line in control.get(fields[1], "").split('\n'):
  173. if len(line) == 0:
  174. continue
  175. (sha1sum, size, filename) = line.split()
  176. entry = entries.get(filename, None)
  177. if entry is None:
  178. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[1], fields[0]))
  179. if entry is not None and entry.get('size', None) != int(size):
  180. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[1]))
  181. entry['sha1sum'] = sha1sum
  182. for line in control.get(fields[2], "").split('\n'):
  183. if len(line) == 0:
  184. continue
  185. (sha256sum, size, filename) = line.split()
  186. entry = entries.get(filename, None)
  187. if entry is None:
  188. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[2], fields[0]))
  189. if entry is not None and entry.get('size', None) != int(size):
  190. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[2]))
  191. entry['sha256sum'] = sha256sum
  192. files = {}
  193. for entry in six.itervalues(entries):
  194. filename = entry['filename']
  195. if 'size' not in entry:
  196. raise InvalidChangesException('No size for {0}.'.format(filename))
  197. if 'md5sum' not in entry:
  198. raise InvalidChangesException('No md5sum for {0}.'.format(filename))
  199. if 'sha1sum' not in entry:
  200. raise InvalidChangesException('No sha1sum for {0}.'.format(filename))
  201. if 'sha256sum' not in entry:
  202. raise InvalidChangesException('No sha256sum for {0}.'.format(filename))
  203. if safe_file_regexp is not None and not safe_file_regexp.match(filename):
  204. raise InvalidChangesException("{0}: References file with unsafe filename {1}.".format(self.filename, filename))
  205. f = files[filename] = HashedFile(**entry)
  206. return files
  207. @functools.total_ordering
  208. class Changes(object):
  209. """Representation of a .changes file
  210. """
  211. def __init__(self, directory, filename, keyrings, require_signature=True):
  212. if not re_file_safe.match(filename):
  213. raise InvalidChangesException('{0}: unsafe filename'.format(filename))
  214. self.directory = directory
  215. """directory the .changes is located in
  216. @type: str
  217. """
  218. self.filename = filename
  219. """name of the .changes file
  220. @type: str
  221. """
  222. with open(self.path, 'rb') as fd:
  223. data = fd.read()
  224. self.signature = SignedFile(data, keyrings, require_signature)
  225. self.changes = apt_pkg.TagSection(self.signature.contents)
  226. """dict to access fields of the .changes file
  227. @type: dict-like
  228. """
  229. self._binaries = None
  230. self._source = None
  231. self._files = None
  232. self._keyrings = keyrings
  233. self._require_signature = require_signature
  234. @property
  235. def path(self):
  236. """path to the .changes file
  237. @type: str
  238. """
  239. return os.path.join(self.directory, self.filename)
  240. @property
  241. def primary_fingerprint(self):
  242. """fingerprint of the key used for signing the .changes file
  243. @type: str
  244. """
  245. return self.signature.primary_fingerprint
  246. @property
  247. def valid_signature(self):
  248. """C{True} if the .changes has a valid signature
  249. @type: bool
  250. """
  251. return self.signature.valid
  252. @property
  253. def weak_signature(self):
  254. """C{True} if the .changes was signed using a weak algorithm
  255. @type: bool
  256. """
  257. return self.signature.weak_signature
  258. @property
  259. def signature_timestamp(self):
  260. return self.signature.signature_timestamp
  261. @property
  262. def contents_sha1(self):
  263. return self.signature.contents_sha1
  264. @property
  265. def architectures(self):
  266. """list of architectures included in the upload
  267. @type: list of str
  268. """
  269. return self.changes.get('Architecture', '').split()
  270. @property
  271. def distributions(self):
  272. """list of target distributions for the upload
  273. @type: list of str
  274. """
  275. return self.changes['Distribution'].split()
  276. @property
  277. def source(self):
  278. """included source or C{None}
  279. @type: L{daklib.upload.Source} or C{None}
  280. """
  281. if self._source is None:
  282. source_files = []
  283. for f in six.itervalues(self.files):
  284. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename):
  285. source_files.append(f)
  286. if len(source_files) > 0:
  287. self._source = Source(self.directory, source_files, self._keyrings, self._require_signature)
  288. return self._source
  289. @property
  290. def sourceful(self):
  291. """C{True} if the upload includes source
  292. @type: bool
  293. """
  294. return "source" in self.architectures
  295. @property
  296. def source_name(self):
  297. """source package name
  298. @type: str
  299. """
  300. return re_field_source.match(self.changes['Source']).group('package')
  301. @property
  302. def binaries(self):
  303. """included binary packages
  304. @type: list of L{daklib.upload.Binary}
  305. """
  306. if self._binaries is None:
  307. binaries = []
  308. for f in six.itervalues(self.files):
  309. if re_file_binary.match(f.filename):
  310. binaries.append(Binary(self.directory, f))
  311. self._binaries = binaries
  312. return self._binaries
  313. @property
  314. def byhand_files(self):
  315. """included byhand files
  316. @type: list of L{daklib.upload.HashedFile}
  317. """
  318. byhand = []
  319. for f in six.itervalues(self.files):
  320. if f.section == 'byhand' or f.section[:4] == 'raw-':
  321. byhand.append(f)
  322. continue
  323. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename) or re_file_binary.match(f.filename):
  324. continue
  325. if re_file_buildinfo.match(f.filename):
  326. continue
  327. raise InvalidChangesException("{0}: {1} looks like a byhand package, but is in section {2}".format(self.filename, f.filename, f.section))
  328. return byhand
  329. @property
  330. def buildinfo_files(self):
  331. """included buildinfo files
  332. @type: list of L{daklib.upload.HashedFile}
  333. """
  334. buildinfo = []
  335. for f in six.itervalues(self.files):
  336. if re_file_buildinfo.match(f.filename):
  337. buildinfo.append(f)
  338. return buildinfo
  339. @property
  340. def binary_names(self):
  341. """names of included binary packages
  342. @type: list of str
  343. """
  344. return self.changes.get('Binary', '').split()
  345. @property
  346. def closed_bugs(self):
  347. """bugs closed by this upload
  348. @type: list of str
  349. """
  350. return self.changes.get('Closes', '').split()
  351. @property
  352. def files(self):
  353. """dict mapping filenames to L{daklib.upload.HashedFile} objects
  354. @type: dict
  355. """
  356. if self._files is None:
  357. self._files = parse_file_list(self.changes, True)
  358. return self._files
  359. @property
  360. def bytes(self):
  361. """total size of files included in this upload in bytes
  362. @type: number
  363. """
  364. count = 0
  365. for f in six.itervalues(self.files):
  366. count += f.size
  367. return count
  368. def _key(self):
  369. """tuple used to compare two changes files
  370. We sort by source name and version first. If these are identical,
  371. we sort changes that include source before those without source (so
  372. that sourceful uploads get processed first), and finally fall back
  373. to the filename (this should really never happen).
  374. @rtype: tuple
  375. """
  376. return (
  377. self.changes.get('Source'),
  378. AptVersion(self.changes.get('Version', '')),
  379. not self.sourceful,
  380. self.filename
  381. )
  382. def __eq__(self, other):
  383. return self._key() == other._key()
  384. def __lt__(self, other):
  385. return self._key() < other._key()
  386. class Binary(object):
  387. """Representation of a binary package
  388. """
  389. def __init__(self, directory, hashed_file):
  390. self.hashed_file = hashed_file
  391. """file object for the .deb
  392. @type: HashedFile
  393. """
  394. path = os.path.join(directory, hashed_file.input_filename)
  395. data = apt_inst.DebFile(path).control.extractdata("control")
  396. self.control = apt_pkg.TagSection(data)
  397. """dict to access fields in DEBIAN/control
  398. @type: dict-like
  399. """
  400. @classmethod
  401. def from_file(cls, directory, filename):
  402. hashed_file = HashedFile.from_file(directory, filename)
  403. return cls(directory, hashed_file)
  404. @property
  405. def source(self):
  406. """get tuple with source package name and version
  407. @type: tuple of str
  408. """
  409. source = self.control.get("Source", None)
  410. if source is None:
  411. return (self.control["Package"], self.control["Version"])
  412. match = re_field_source.match(source)
  413. if not match:
  414. raise InvalidBinaryException('{0}: Invalid Source field.'.format(self.hashed_file.filename))
  415. version = match.group('version')
  416. if version is None:
  417. version = self.control['Version']
  418. return (match.group('package'), version)
  419. @property
  420. def name(self):
  421. return self.control['Package']
  422. @property
  423. def type(self):
  424. """package type ('deb' or 'udeb')
  425. @type: str
  426. """
  427. match = re_file_binary.match(self.hashed_file.filename)
  428. if not match:
  429. raise InvalidBinaryException('{0}: Does not match re_file_binary'.format(self.hashed_file.filename))
  430. return match.group('type')
  431. @property
  432. def component(self):
  433. """component name
  434. @type: str
  435. """
  436. fields = self.control['Section'].split('/')
  437. if len(fields) > 1:
  438. return fields[0]
  439. return "main"
  440. class Source(object):
  441. """Representation of a source package
  442. """
  443. def __init__(self, directory, hashed_files, keyrings, require_signature=True):
  444. self.hashed_files = hashed_files
  445. """list of source files (including the .dsc itself)
  446. @type: list of L{HashedFile}
  447. """
  448. self._dsc_file = None
  449. for f in hashed_files:
  450. if re_file_dsc.match(f.filename):
  451. if self._dsc_file is not None:
  452. raise InvalidSourceException("Multiple .dsc found ({0} and {1})".format(self._dsc_file.filename, f.filename))
  453. else:
  454. self._dsc_file = f
  455. if self._dsc_file is None:
  456. raise InvalidSourceException("No .dsc included in source files")
  457. # make sure the hash for the dsc is valid before we use it
  458. self._dsc_file.check(directory)
  459. dsc_file_path = os.path.join(directory, self._dsc_file.input_filename)
  460. with open(dsc_file_path, 'rb') as fd:
  461. data = fd.read()
  462. self.signature = SignedFile(data, keyrings, require_signature)
  463. self.dsc = apt_pkg.TagSection(self.signature.contents)
  464. """dict to access fields in the .dsc file
  465. @type: dict-like
  466. """
  467. self.package_list = daklib.packagelist.PackageList(self.dsc)
  468. """Information about packages built by the source.
  469. @type: daklib.packagelist.PackageList
  470. """
  471. self._files = None
  472. @classmethod
  473. def from_file(cls, directory, filename, keyrings, require_signature=True):
  474. hashed_file = HashedFile.from_file(directory, filename)
  475. return cls(directory, [hashed_file], keyrings, require_signature)
  476. @property
  477. def files(self):
  478. """dict mapping filenames to L{HashedFile} objects for additional source files
  479. This list does not include the .dsc itself.
  480. @type: dict
  481. """
  482. if self._files is None:
  483. self._files = parse_file_list(self.dsc, False)
  484. return self._files
  485. @property
  486. def primary_fingerprint(self):
  487. """fingerprint of the key used to sign the .dsc
  488. @type: str
  489. """
  490. return self.signature.primary_fingerprint
  491. @property
  492. def valid_signature(self):
  493. """C{True} if the .dsc has a valid signature
  494. @type: bool
  495. """
  496. return self.signature.valid
  497. @property
  498. def weak_signature(self):
  499. """C{True} if the .dsc was signed using a weak algorithm
  500. @type: bool
  501. """
  502. return self.signature.weak_signature
  503. @property
  504. def component(self):
  505. """guessed component name
  506. Might be wrong. Don't rely on this.
  507. @type: str
  508. """
  509. if 'Section' not in self.dsc:
  510. return 'main'
  511. fields = self.dsc['Section'].split('/')
  512. if len(fields) > 1:
  513. return fields[0]
  514. return "main"
  515. @property
  516. def filename(self):
  517. """filename of .dsc file
  518. @type: str
  519. """
  520. return self._dsc_file.filename