upload.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646
  1. # Copyright (C) 2012, Ansgar Burchardt <ansgar@debian.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along
  14. # with this program; if not, write to the Free Software Foundation, Inc.,
  15. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  16. """module to handle uploads not yet installed to the archive
  17. This module provides classes to handle uploads not yet installed to the
  18. archive. Central is the L{Changes} class which represents a changes file.
  19. It provides methods to access the included binary and source packages.
  20. """
  21. import apt_inst
  22. import apt_pkg
  23. import errno
  24. import functools
  25. import os
  26. from daklib.aptversion import AptVersion
  27. from daklib.gpg import SignedFile
  28. from daklib.regexes import *
  29. import daklib.dakapt
  30. import daklib.packagelist
  31. class UploadException(Exception):
  32. pass
  33. class InvalidChangesException(UploadException):
  34. pass
  35. class InvalidBinaryException(UploadException):
  36. pass
  37. class InvalidSourceException(UploadException):
  38. pass
  39. class InvalidHashException(UploadException):
  40. def __init__(self, filename, hash_name, expected, actual):
  41. self.filename = filename
  42. self.hash_name = hash_name
  43. self.expected = expected
  44. self.actual = actual
  45. def __str__(self):
  46. return ("Invalid {0} hash for {1}:\n"
  47. "According to the control file the {0} hash should be {2},\n"
  48. "but {1} has {3}.\n"
  49. "\n"
  50. "If you did not include {1} in your upload, a different version\n"
  51. "might already be known to the archive software.") \
  52. .format(self.hash_name, self.filename, self.expected, self.actual)
  53. class InvalidFilenameException(UploadException):
  54. def __init__(self, filename):
  55. self.filename = filename
  56. def __str__(self):
  57. return "Invalid filename '{0}'.".format(self.filename)
  58. class FileDoesNotExist(UploadException):
  59. def __init__(self, filename):
  60. self.filename = filename
  61. def __str__(self):
  62. return "Refers to non-existing file '{0}'".format(self.filename)
  63. class HashedFile:
  64. """file with checksums
  65. """
  66. def __init__(self, filename, size, md5sum, sha1sum, sha256sum, section=None, priority=None, input_filename=None):
  67. self.filename = filename
  68. """name of the file
  69. @type: str
  70. """
  71. if input_filename is None:
  72. input_filename = filename
  73. self.input_filename = input_filename
  74. """name of the file on disk
  75. Used for temporary files that should not be installed using their on-disk name.
  76. @type: str
  77. """
  78. self.size = size
  79. """size in bytes
  80. @type: long
  81. """
  82. self.md5sum = md5sum
  83. """MD5 hash in hexdigits
  84. @type: str
  85. """
  86. self.sha1sum = sha1sum
  87. """SHA1 hash in hexdigits
  88. @type: str
  89. """
  90. self.sha256sum = sha256sum
  91. """SHA256 hash in hexdigits
  92. @type: str
  93. """
  94. self.section = section
  95. """section or C{None}
  96. @type: str or C{None}
  97. """
  98. self.priority = priority
  99. """priority or C{None}
  100. @type: str of C{None}
  101. """
  102. @classmethod
  103. def from_file(cls, directory, filename, section=None, priority=None):
  104. """create with values for an existing file
  105. Create a C{HashedFile} object that refers to an already existing file.
  106. @type directory: str
  107. @param directory: directory the file is located in
  108. @type filename: str
  109. @param filename: filename
  110. @type section: str or C{None}
  111. @param section: optional section as given in .changes files
  112. @type priority: str or C{None}
  113. @param priority: optional priority as given in .changes files
  114. @rtype: L{HashedFile}
  115. @return: C{HashedFile} object for the given file
  116. """
  117. path = os.path.join(directory, filename)
  118. with open(path, 'r') as fh:
  119. size = os.fstat(fh.fileno()).st_size
  120. hashes = daklib.dakapt.DakHashes(fh)
  121. return cls(filename, size, hashes.md5, hashes.sha1, hashes.sha256, section, priority)
  122. def check(self, directory):
  123. """Validate hashes
  124. Check if size and hashes match the expected value.
  125. @type directory: str
  126. @param directory: directory the file is located in
  127. @raise InvalidHashException: hash mismatch
  128. """
  129. path = os.path.join(directory, self.input_filename)
  130. try:
  131. with open(path) as fh:
  132. self.check_fh(fh)
  133. except OSError as e:
  134. if e.errno == errno.ENOENT:
  135. raise FileDoesNotExist(self.input_filename)
  136. raise
  137. def check_fh(self, fh):
  138. size = os.fstat(fh.fileno()).st_size
  139. fh.seek(0)
  140. hashes = daklib.dakapt.DakHashes(fh)
  141. if size != self.size:
  142. raise InvalidHashException(self.filename, 'size', self.size, size)
  143. if hashes.md5 != self.md5sum:
  144. raise InvalidHashException(self.filename, 'md5sum', self.md5sum, hashes.md5)
  145. if hashes.sha1 != self.sha1sum:
  146. raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, hashes.sha1)
  147. if hashes.sha256 != self.sha256sum:
  148. raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, hashes.sha256)
  149. def parse_file_list(control, has_priority_and_section, safe_file_regexp=re_file_safe, fields=('Files', 'Checksums-Sha1', 'Checksums-Sha256')):
  150. """Parse Files and Checksums-* fields
  151. @type control: dict-like
  152. @param control: control file to take fields from
  153. @type has_priority_and_section: bool
  154. @param has_priority_and_section: Files field include section and priority
  155. (as in .changes)
  156. @raise InvalidChangesException: missing fields or other grave errors
  157. @rtype: dict
  158. @return: dict mapping filenames to L{daklib.upload.HashedFile} objects
  159. """
  160. entries = {}
  161. for line in control.get(fields[0], "").split('\n'):
  162. if len(line) == 0:
  163. continue
  164. if has_priority_and_section:
  165. (md5sum, size, section, priority, filename) = line.split()
  166. entry = dict(md5sum=md5sum, size=int(size), section=section, priority=priority, filename=filename)
  167. else:
  168. (md5sum, size, filename) = line.split()
  169. entry = dict(md5sum=md5sum, size=int(size), filename=filename)
  170. entries[filename] = entry
  171. for line in control.get(fields[1], "").split('\n'):
  172. if len(line) == 0:
  173. continue
  174. (sha1sum, size, filename) = line.split()
  175. entry = entries.get(filename, None)
  176. if entry is None:
  177. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[1], fields[0]))
  178. if entry is not None and entry.get('size', None) != int(size):
  179. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[1]))
  180. entry['sha1sum'] = sha1sum
  181. for line in control.get(fields[2], "").split('\n'):
  182. if len(line) == 0:
  183. continue
  184. (sha256sum, size, filename) = line.split()
  185. entry = entries.get(filename, None)
  186. if entry is None:
  187. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[2], fields[0]))
  188. if entry is not None and entry.get('size', None) != int(size):
  189. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[2]))
  190. entry['sha256sum'] = sha256sum
  191. files = {}
  192. for entry in entries.values():
  193. filename = entry['filename']
  194. if 'size' not in entry:
  195. raise InvalidChangesException('No size for {0}.'.format(filename))
  196. if 'md5sum' not in entry:
  197. raise InvalidChangesException('No md5sum for {0}.'.format(filename))
  198. if 'sha1sum' not in entry:
  199. raise InvalidChangesException('No sha1sum for {0}.'.format(filename))
  200. if 'sha256sum' not in entry:
  201. raise InvalidChangesException('No sha256sum for {0}.'.format(filename))
  202. if safe_file_regexp is not None and not safe_file_regexp.match(filename):
  203. raise InvalidChangesException("{0}: References file with unsafe filename {1}.".format(self.filename, filename))
  204. files[filename] = HashedFile(**entry)
  205. return files
  206. @functools.total_ordering
  207. class Changes:
  208. """Representation of a .changes file
  209. """
  210. def __init__(self, directory, filename, keyrings, require_signature=True):
  211. if not re_file_safe.match(filename):
  212. raise InvalidChangesException('{0}: unsafe filename'.format(filename))
  213. self.directory = directory
  214. """directory the .changes is located in
  215. @type: str
  216. """
  217. self.filename = filename
  218. """name of the .changes file
  219. @type: str
  220. """
  221. with open(self.path, 'rb') as fd:
  222. data = fd.read()
  223. self.signature = SignedFile(data, keyrings, require_signature)
  224. self.changes = apt_pkg.TagSection(self.signature.contents)
  225. """dict to access fields of the .changes file
  226. @type: dict-like
  227. """
  228. self._binaries = None
  229. self._source = None
  230. self._files = None
  231. self._keyrings = keyrings
  232. self._require_signature = require_signature
  233. @property
  234. def path(self):
  235. """path to the .changes file
  236. @type: str
  237. """
  238. return os.path.join(self.directory, self.filename)
  239. @property
  240. def primary_fingerprint(self):
  241. """fingerprint of the key used for signing the .changes file
  242. @type: str
  243. """
  244. return self.signature.primary_fingerprint
  245. @property
  246. def valid_signature(self):
  247. """C{True} if the .changes has a valid signature
  248. @type: bool
  249. """
  250. return self.signature.valid
  251. @property
  252. def weak_signature(self):
  253. """C{True} if the .changes was signed using a weak algorithm
  254. @type: bool
  255. """
  256. return self.signature.weak_signature
  257. @property
  258. def signature_timestamp(self):
  259. return self.signature.signature_timestamp
  260. @property
  261. def contents_sha1(self):
  262. return self.signature.contents_sha1
  263. @property
  264. def architectures(self):
  265. """list of architectures included in the upload
  266. @type: list of str
  267. """
  268. return self.changes.get('Architecture', '').split()
  269. @property
  270. def distributions(self):
  271. """list of target distributions for the upload
  272. @type: list of str
  273. """
  274. return self.changes['Distribution'].split()
  275. @property
  276. def source(self):
  277. """included source or C{None}
  278. @type: L{daklib.upload.Source} or C{None}
  279. """
  280. if self._source is None:
  281. source_files = []
  282. for f in self.files.values():
  283. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename):
  284. source_files.append(f)
  285. if len(source_files) > 0:
  286. self._source = Source(self.directory, source_files, self._keyrings, self._require_signature)
  287. return self._source
  288. @property
  289. def sourceful(self):
  290. """C{True} if the upload includes source
  291. @type: bool
  292. """
  293. return "source" in self.architectures
  294. @property
  295. def source_name(self):
  296. """source package name
  297. @type: str
  298. """
  299. return re_field_source.match(self.changes['Source']).group('package')
  300. @property
  301. def binaries(self):
  302. """included binary packages
  303. @type: list of L{daklib.upload.Binary}
  304. """
  305. if self._binaries is None:
  306. binaries = []
  307. for f in self.files.values():
  308. if re_file_binary.match(f.filename):
  309. binaries.append(Binary(self.directory, f))
  310. self._binaries = binaries
  311. return self._binaries
  312. @property
  313. def byhand_files(self):
  314. """included byhand files
  315. @type: list of L{daklib.upload.HashedFile}
  316. """
  317. byhand = []
  318. for f in self.files.values():
  319. if f.section == 'byhand' or f.section[:4] == 'raw-':
  320. byhand.append(f)
  321. continue
  322. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename) or re_file_binary.match(f.filename):
  323. continue
  324. if re_file_buildinfo.match(f.filename):
  325. continue
  326. raise InvalidChangesException("{0}: {1} looks like a byhand package, but is in section {2}".format(self.filename, f.filename, f.section))
  327. return byhand
  328. @property
  329. def buildinfo_files(self):
  330. """included buildinfo files
  331. @type: list of L{daklib.upload.HashedFile}
  332. """
  333. buildinfo = []
  334. for f in self.files.values():
  335. if re_file_buildinfo.match(f.filename):
  336. buildinfo.append(f)
  337. return buildinfo
  338. @property
  339. def binary_names(self):
  340. """names of included binary packages
  341. @type: list of str
  342. """
  343. return self.changes.get('Binary', '').split()
  344. @property
  345. def closed_bugs(self):
  346. """bugs closed by this upload
  347. @type: list of str
  348. """
  349. return self.changes.get('Closes', '').split()
  350. @property
  351. def files(self):
  352. """dict mapping filenames to L{daklib.upload.HashedFile} objects
  353. @type: dict
  354. """
  355. if self._files is None:
  356. self._files = parse_file_list(self.changes, True)
  357. return self._files
  358. @property
  359. def bytes(self):
  360. """total size of files included in this upload in bytes
  361. @type: number
  362. """
  363. count = 0
  364. for f in self.files.values():
  365. count += f.size
  366. return count
  367. def _key(self):
  368. """tuple used to compare two changes files
  369. We sort by source name and version first. If these are identical,
  370. we sort changes that include source before those without source (so
  371. that sourceful uploads get processed first), and finally fall back
  372. to the filename (this should really never happen).
  373. @rtype: tuple
  374. """
  375. return (
  376. self.changes.get('Source'),
  377. AptVersion(self.changes.get('Version', '')),
  378. not self.sourceful,
  379. self.filename
  380. )
  381. def __eq__(self, other):
  382. return self._key() == other._key()
  383. def __lt__(self, other):
  384. return self._key() < other._key()
  385. class Binary:
  386. """Representation of a binary package
  387. """
  388. def __init__(self, directory, hashed_file):
  389. self.hashed_file = hashed_file
  390. """file object for the .deb
  391. @type: HashedFile
  392. """
  393. path = os.path.join(directory, hashed_file.input_filename)
  394. data = apt_inst.DebFile(path).control.extractdata("control")
  395. self.control = apt_pkg.TagSection(data)
  396. """dict to access fields in DEBIAN/control
  397. @type: dict-like
  398. """
  399. @classmethod
  400. def from_file(cls, directory, filename):
  401. hashed_file = HashedFile.from_file(directory, filename)
  402. return cls(directory, hashed_file)
  403. @property
  404. def source(self):
  405. """get tuple with source package name and version
  406. @type: tuple of str
  407. """
  408. source = self.control.get("Source", None)
  409. if source is None:
  410. return (self.control["Package"], self.control["Version"])
  411. match = re_field_source.match(source)
  412. if not match:
  413. raise InvalidBinaryException('{0}: Invalid Source field.'.format(self.hashed_file.filename))
  414. version = match.group('version')
  415. if version is None:
  416. version = self.control['Version']
  417. return (match.group('package'), version)
  418. @property
  419. def name(self):
  420. return self.control['Package']
  421. @property
  422. def type(self):
  423. """package type ('deb' or 'udeb')
  424. @type: str
  425. """
  426. match = re_file_binary.match(self.hashed_file.filename)
  427. if not match:
  428. raise InvalidBinaryException('{0}: Does not match re_file_binary'.format(self.hashed_file.filename))
  429. return match.group('type')
  430. @property
  431. def component(self):
  432. """component name
  433. @type: str
  434. """
  435. fields = self.control['Section'].split('/')
  436. if len(fields) > 1:
  437. return fields[0]
  438. return "main"
  439. class Source:
  440. """Representation of a source package
  441. """
  442. def __init__(self, directory, hashed_files, keyrings, require_signature=True):
  443. self.hashed_files = hashed_files
  444. """list of source files (including the .dsc itself)
  445. @type: list of L{HashedFile}
  446. """
  447. self._dsc_file = None
  448. for f in hashed_files:
  449. if re_file_dsc.match(f.filename):
  450. if self._dsc_file is not None:
  451. raise InvalidSourceException("Multiple .dsc found ({0} and {1})".format(self._dsc_file.filename, f.filename))
  452. else:
  453. self._dsc_file = f
  454. if self._dsc_file is None:
  455. raise InvalidSourceException("No .dsc included in source files")
  456. # make sure the hash for the dsc is valid before we use it
  457. self._dsc_file.check(directory)
  458. dsc_file_path = os.path.join(directory, self._dsc_file.input_filename)
  459. with open(dsc_file_path, 'rb') as fd:
  460. data = fd.read()
  461. self.signature = SignedFile(data, keyrings, require_signature)
  462. self.dsc = apt_pkg.TagSection(self.signature.contents)
  463. """dict to access fields in the .dsc file
  464. @type: dict-like
  465. """
  466. self.package_list = daklib.packagelist.PackageList(self.dsc)
  467. """Information about packages built by the source.
  468. @type: daklib.packagelist.PackageList
  469. """
  470. self._files = None
  471. @classmethod
  472. def from_file(cls, directory, filename, keyrings, require_signature=True):
  473. hashed_file = HashedFile.from_file(directory, filename)
  474. return cls(directory, [hashed_file], keyrings, require_signature)
  475. @property
  476. def files(self):
  477. """dict mapping filenames to L{HashedFile} objects for additional source files
  478. This list does not include the .dsc itself.
  479. @type: dict
  480. """
  481. if self._files is None:
  482. self._files = parse_file_list(self.dsc, False)
  483. return self._files
  484. @property
  485. def primary_fingerprint(self):
  486. """fingerprint of the key used to sign the .dsc
  487. @type: str
  488. """
  489. return self.signature.primary_fingerprint
  490. @property
  491. def valid_signature(self):
  492. """C{True} if the .dsc has a valid signature
  493. @type: bool
  494. """
  495. return self.signature.valid
  496. @property
  497. def weak_signature(self):
  498. """C{True} if the .dsc was signed using a weak algorithm
  499. @type: bool
  500. """
  501. return self.signature.weak_signature
  502. @property
  503. def component(self):
  504. """guessed component name
  505. Might be wrong. Don't rely on this.
  506. @type: str
  507. """
  508. if 'Section' not in self.dsc:
  509. return 'main'
  510. fields = self.dsc['Section'].split('/')
  511. if len(fields) > 1:
  512. return fields[0]
  513. return "main"
  514. @property
  515. def filename(self):
  516. """filename of .dsc file
  517. @type: str
  518. """
  519. return self._dsc_file.filename