upload.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. # Copyright (C) 2012, Ansgar Burchardt <ansgar@debian.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along
  14. # with this program; if not, write to the Free Software Foundation, Inc.,
  15. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  16. """module to handle uploads not yet installed to the archive
  17. This module provides classes to handle uploads not yet installed to the
  18. archive. Central is the :class:`Changes` class which represents a changes file.
  19. It provides methods to access the included binary and source packages.
  20. """
  21. import apt_inst
  22. import apt_pkg
  23. import errno
  24. import functools
  25. import os
  26. from collections.abc import Mapping
  27. from typing import Optional, TYPE_CHECKING
  28. from daklib.aptversion import AptVersion
  29. from daklib.gpg import SignedFile
  30. from daklib.regexes import *
  31. import daklib.dakapt
  32. import daklib.packagelist
  33. if TYPE_CHECKING:
  34. import datetime
  35. import re
  36. class UploadException(Exception):
  37. pass
  38. class InvalidChangesException(UploadException):
  39. pass
  40. class InvalidBinaryException(UploadException):
  41. pass
  42. class InvalidSourceException(UploadException):
  43. pass
  44. class InvalidHashException(UploadException):
  45. def __init__(self, filename: str, hash_name: str, expected, actual):
  46. self.filename = filename
  47. self.hash_name = hash_name
  48. self.expected = expected
  49. self.actual = actual
  50. def __str__(self):
  51. return ("Invalid {0} hash for {1}:\n"
  52. "According to the control file the {0} hash should be {2},\n"
  53. "but {1} has {3}.\n"
  54. "\n"
  55. "If you did not include {1} in your upload, a different version\n"
  56. "might already be known to the archive software.") \
  57. .format(self.hash_name, self.filename, self.expected, self.actual)
  58. class InvalidFilenameException(UploadException):
  59. def __init__(self, filename: str):
  60. self.filename: str = filename
  61. def __str__(self):
  62. return "Invalid filename '{0}'.".format(self.filename)
  63. class FileDoesNotExist(UploadException):
  64. def __init__(self, filename: str):
  65. self.filename = filename
  66. def __str__(self):
  67. return "Refers to non-existing file '{0}'".format(self.filename)
  68. class HashedFile:
  69. """file with checksums
  70. """
  71. def __init__(self, filename: str, size: int, md5sum: str, sha1sum: str, sha256sum: str, section: Optional[str] = None, priority: Optional[str] = None, input_filename: Optional[str] = None):
  72. self.filename: str = filename
  73. """name of the file"""
  74. if input_filename is None:
  75. input_filename = filename
  76. self.input_filename: str = input_filename
  77. """name of the file on disk
  78. Used for temporary files that should not be installed using their on-disk name.
  79. """
  80. self.size: int = size
  81. """size in bytes"""
  82. self.md5sum: str = md5sum
  83. """MD5 hash in hexdigits"""
  84. self.sha1sum: str = sha1sum
  85. """SHA1 hash in hexdigits"""
  86. self.sha256sum: str = sha256sum
  87. """SHA256 hash in hexdigits"""
  88. self.section: Optional[str] = section
  89. """section or :const:`None`"""
  90. self.priority: Optional[str] = priority
  91. """priority or :const:`None`"""
  92. @classmethod
  93. def from_file(cls, directory: str, filename: str, section: Optional[str] = None, priority: Optional[str] = None) -> 'HashedFile':
  94. """create with values for an existing file
  95. Create a :class:`HashedFile` object that refers to an already existing file.
  96. :param directory: directory the file is located in
  97. :param filename: filename
  98. :param section: optional section as given in .changes files
  99. :param priority: optional priority as given in .changes files
  100. :return: :class:`HashedFile` object for the given file
  101. """
  102. path = os.path.join(directory, filename)
  103. with open(path, 'r') as fh:
  104. size = os.fstat(fh.fileno()).st_size
  105. hashes = daklib.dakapt.DakHashes(fh)
  106. return cls(filename, size, hashes.md5, hashes.sha1, hashes.sha256, section, priority)
  107. def check(self, directory: str) -> None:
  108. """Validate hashes
  109. Check if size and hashes match the expected value.
  110. :param directory: directory the file is located in
  111. :raises InvalidHashException: if there is a hash mismatch
  112. """
  113. path = os.path.join(directory, self.input_filename)
  114. try:
  115. with open(path) as fh:
  116. self.check_fh(fh)
  117. except OSError as e:
  118. if e.errno == errno.ENOENT:
  119. raise FileDoesNotExist(self.input_filename)
  120. raise
  121. def check_fh(self, fh) -> None:
  122. size = os.fstat(fh.fileno()).st_size
  123. fh.seek(0)
  124. hashes = daklib.dakapt.DakHashes(fh)
  125. if size != self.size:
  126. raise InvalidHashException(self.filename, 'size', self.size, size)
  127. if hashes.md5 != self.md5sum:
  128. raise InvalidHashException(self.filename, 'md5sum', self.md5sum, hashes.md5)
  129. if hashes.sha1 != self.sha1sum:
  130. raise InvalidHashException(self.filename, 'sha1sum', self.sha1sum, hashes.sha1)
  131. if hashes.sha256 != self.sha256sum:
  132. raise InvalidHashException(self.filename, 'sha256sum', self.sha256sum, hashes.sha256)
  133. def parse_file_list(
  134. control: Mapping[str, str],
  135. has_priority_and_section: bool,
  136. safe_file_regexp: 're.Pattern' = re_file_safe,
  137. fields=('Files', 'Checksums-Sha1', 'Checksums-Sha256')
  138. ) -> dict[str, HashedFile]:
  139. """Parse Files and Checksums-* fields
  140. :param control: control file to take fields from
  141. :param has_priority_and_section: Files field include section and priority
  142. (as in .changes)
  143. :return: dict mapping filenames to :class:`HashedFile` objects
  144. :raises InvalidChangesException: missing fields or other grave errors
  145. """
  146. entries = {}
  147. for line in control.get(fields[0], "").split('\n'):
  148. if len(line) == 0:
  149. continue
  150. if has_priority_and_section:
  151. (md5sum, size, section, priority, filename) = line.split()
  152. entry = dict(md5sum=md5sum, size=int(size), section=section, priority=priority, filename=filename)
  153. else:
  154. (md5sum, size, filename) = line.split()
  155. entry = dict(md5sum=md5sum, size=int(size), filename=filename)
  156. entries[filename] = entry
  157. for line in control.get(fields[1], "").split('\n'):
  158. if len(line) == 0:
  159. continue
  160. (sha1sum, size, filename) = line.split()
  161. entry = entries.get(filename)
  162. if entry is None:
  163. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[1], fields[0]))
  164. if entry is not None and entry.get('size', None) != int(size):
  165. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[1]))
  166. entry['sha1sum'] = sha1sum
  167. for line in control.get(fields[2], "").split('\n'):
  168. if len(line) == 0:
  169. continue
  170. (sha256sum, size, filename) = line.split()
  171. entry = entries.get(filename)
  172. if entry is None:
  173. raise InvalidChangesException('{0} is listed in {1}, but not in {2}.'.format(filename, fields[2], fields[0]))
  174. if entry is not None and entry.get('size', None) != int(size):
  175. raise InvalidChangesException('Size for {0} in {1} and {2} fields differ.'.format(filename, fields[0], fields[2]))
  176. entry['sha256sum'] = sha256sum
  177. files = {}
  178. for entry in entries.values():
  179. filename = entry['filename']
  180. if 'size' not in entry:
  181. raise InvalidChangesException('No size for {0}.'.format(filename))
  182. if 'md5sum' not in entry:
  183. raise InvalidChangesException('No md5sum for {0}.'.format(filename))
  184. if 'sha1sum' not in entry:
  185. raise InvalidChangesException('No sha1sum for {0}.'.format(filename))
  186. if 'sha256sum' not in entry:
  187. raise InvalidChangesException('No sha256sum for {0}.'.format(filename))
  188. if safe_file_regexp is not None and not safe_file_regexp.match(filename):
  189. raise InvalidChangesException(f"References file with unsafe filename '{filename}'.")
  190. files[filename] = HashedFile(**entry)
  191. return files
  192. @functools.total_ordering
  193. class Changes:
  194. """Representation of a .changes file
  195. """
  196. def __init__(self, directory: str, filename: str, keyrings, require_signature: bool = True):
  197. if not re_file_safe.match(filename):
  198. raise InvalidChangesException('{0}: unsafe filename'.format(filename))
  199. self.directory: str = directory
  200. """directory the .changes is located in"""
  201. self.filename: str = filename
  202. """name of the .changes file"""
  203. with open(self.path, 'rb') as fd:
  204. data = fd.read()
  205. self.signature = SignedFile(data, keyrings, require_signature)
  206. self.changes: apt_pkg.TagSection = apt_pkg.TagSection(self.signature.contents)
  207. """dict to access fields of the .changes file"""
  208. self._binaries: 'Optional[list[Binary]]' = None
  209. self._source: 'Optional[Source]' = None
  210. self._files: Optional[dict[str, HashedFile]] = None
  211. self._keyrings = keyrings
  212. self._require_signature: bool = require_signature
  213. @property
  214. def path(self) -> str:
  215. """path to the .changes file"""
  216. return os.path.join(self.directory, self.filename)
  217. @property
  218. def primary_fingerprint(self) -> str:
  219. """fingerprint of the key used for signing the .changes file"""
  220. return self.signature.primary_fingerprint
  221. @property
  222. def valid_signature(self) -> bool:
  223. """:const:`True` if the .changes has a valid signature"""
  224. return self.signature.valid
  225. @property
  226. def weak_signature(self) -> bool:
  227. """:const:`True` if the .changes was signed using a weak algorithm"""
  228. return self.signature.weak_signature
  229. @property
  230. def signature_timestamp(self) -> 'datetime.datetime':
  231. return self.signature.signature_timestamp
  232. @property
  233. def contents_sha1(self) -> str:
  234. return self.signature.contents_sha1
  235. @property
  236. def architectures(self) -> list[str]:
  237. """list of architectures included in the upload"""
  238. return self.changes.get('Architecture', '').split()
  239. @property
  240. def distributions(self) -> list[str]:
  241. """list of target distributions for the upload"""
  242. return self.changes['Distribution'].split()
  243. @property
  244. def source(self) -> 'Optional[Source]':
  245. """included source or :const:`None`"""
  246. if self._source is None:
  247. source_files = []
  248. for f in self.files.values():
  249. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename):
  250. source_files.append(f)
  251. if len(source_files) > 0:
  252. self._source = Source(self.directory, source_files, self._keyrings, self._require_signature)
  253. return self._source
  254. @property
  255. def sourceful(self) -> bool:
  256. """:const:`True` if the upload includes source"""
  257. return "source" in self.architectures
  258. @property
  259. def source_name(self) -> str:
  260. """source package name"""
  261. return re_field_source.match(self.changes['Source']).group('package')
  262. @property
  263. def binaries(self) -> 'list[Binary]':
  264. """included binary packages"""
  265. if self._binaries is None:
  266. self._binaries = [
  267. Binary(self.directory, f)
  268. for f in self.files.values()
  269. if re_file_binary.match(f.filename)
  270. ]
  271. return self._binaries
  272. @property
  273. def byhand_files(self) -> list[HashedFile]:
  274. """included byhand files"""
  275. byhand = []
  276. for f in self.files.values():
  277. if f.section == 'byhand' or f.section[:4] == 'raw-':
  278. byhand.append(f)
  279. continue
  280. if re_file_dsc.match(f.filename) or re_file_source.match(f.filename) or re_file_binary.match(f.filename):
  281. continue
  282. if re_file_buildinfo.match(f.filename):
  283. continue
  284. raise InvalidChangesException("{0}: {1} looks like a byhand package, but is in section {2}".format(self.filename, f.filename, f.section))
  285. return byhand
  286. @property
  287. def buildinfo_files(self) -> list[HashedFile]:
  288. """included buildinfo files"""
  289. return [
  290. f for f in self.files.values()
  291. if re_file_buildinfo.match(f.filename)
  292. ]
  293. @property
  294. def binary_names(self) -> list[str]:
  295. """names of included binary packages"""
  296. return self.changes.get('Binary', '').split()
  297. @property
  298. def closed_bugs(self) -> list[str]:
  299. """bugs closed by this upload"""
  300. return self.changes.get('Closes', '').split()
  301. @property
  302. def files(self) -> dict[str, HashedFile]:
  303. """dict mapping filenames to :class:`HashedFile` objects"""
  304. if self._files is None:
  305. self._files = parse_file_list(self.changes, True)
  306. return self._files
  307. @property
  308. def bytes(self) -> int:
  309. """total size of files included in this upload in bytes"""
  310. return sum(f.size for f in self.files.values())
  311. def _key(self) -> tuple[str, AptVersion, bool, str]:
  312. """tuple used to compare two changes files
  313. We sort by source name and version first. If these are identical,
  314. we sort changes that include source before those without source (so
  315. that sourceful uploads get processed first), and finally fall back
  316. to the filename (this should really never happen).
  317. """
  318. return (
  319. self.changes.get('Source', ''),
  320. AptVersion(self.changes.get('Version', '')),
  321. not self.sourceful,
  322. self.filename
  323. )
  324. def __eq__(self, other: object) -> bool:
  325. if not isinstance(other, Changes):
  326. return NotImplemented
  327. return self._key() == other._key()
  328. def __lt__(self, other: 'Changes') -> bool:
  329. return self._key() < other._key()
  330. class Binary:
  331. """Representation of a binary package
  332. """
  333. def __init__(self, directory: str, hashed_file: HashedFile):
  334. self.hashed_file: HashedFile = hashed_file
  335. """file object for the .deb"""
  336. path = os.path.join(directory, hashed_file.input_filename)
  337. data = apt_inst.DebFile(path).control.extractdata("control")
  338. self.control: apt_pkg.TagSection = apt_pkg.TagSection(data)
  339. """dict to access fields in DEBIAN/control"""
  340. @classmethod
  341. def from_file(cls, directory, filename) -> 'Binary':
  342. hashed_file = HashedFile.from_file(directory, filename)
  343. return cls(directory, hashed_file)
  344. @property
  345. def source(self) -> tuple[str, str]:
  346. """get tuple with source package name and version"""
  347. source = self.control.get("Source", None)
  348. if source is None:
  349. return (self.control["Package"], self.control["Version"])
  350. match = re_field_source.match(source)
  351. if not match:
  352. raise InvalidBinaryException('{0}: Invalid Source field.'.format(self.hashed_file.filename))
  353. version = match.group('version')
  354. if version is None:
  355. version = self.control['Version']
  356. return (match.group('package'), version)
  357. @property
  358. def name(self) -> str:
  359. return self.control['Package']
  360. @property
  361. def type(self) -> str:
  362. """package type ('deb' or 'udeb')"""
  363. match = re_file_binary.match(self.hashed_file.filename)
  364. if not match:
  365. raise InvalidBinaryException('{0}: Does not match re_file_binary'.format(self.hashed_file.filename))
  366. return match.group('type')
  367. @property
  368. def component(self) -> str:
  369. """component name"""
  370. fields = self.control['Section'].split('/')
  371. if len(fields) > 1:
  372. return fields[0]
  373. return "main"
  374. class Source:
  375. """Representation of a source package
  376. """
  377. def __init__(self, directory: str, hashed_files: list[HashedFile], keyrings, require_signature=True):
  378. self.hashed_files: list[HashedFile] = hashed_files
  379. """list of source files (including the .dsc itself)"""
  380. dsc_file = None
  381. for f in hashed_files:
  382. if re_file_dsc.match(f.filename):
  383. if dsc_file is not None:
  384. raise InvalidSourceException("Multiple .dsc found ({0} and {1})".format(self._dsc_file.filename, f.filename))
  385. else:
  386. dsc_file = f
  387. if dsc_file is None:
  388. raise InvalidSourceException("No .dsc included in source files")
  389. self._dsc_file: HashedFile = dsc_file
  390. # make sure the hash for the dsc is valid before we use it
  391. self._dsc_file.check(directory)
  392. dsc_file_path = os.path.join(directory, self._dsc_file.input_filename)
  393. with open(dsc_file_path, 'rb') as fd:
  394. data = fd.read()
  395. self.signature = SignedFile(data, keyrings, require_signature)
  396. self.dsc: Mapping[str, str] = apt_pkg.TagSection(self.signature.contents)
  397. """dict to access fields in the .dsc file"""
  398. self.package_list: daklib.packagelist.PackageList = daklib.packagelist.PackageList(self.dsc)
  399. """Information about packages built by the source."""
  400. self._files: Optional[dict[str, HashedFile]] = None
  401. @classmethod
  402. def from_file(cls, directory, filename, keyrings, require_signature=True) -> 'Source':
  403. hashed_file = HashedFile.from_file(directory, filename)
  404. return cls(directory, [hashed_file], keyrings, require_signature)
  405. @property
  406. def files(self) -> dict[str, HashedFile]:
  407. """dict mapping filenames to :class:`HashedFile` objects for additional source files
  408. This list does not include the .dsc itself.
  409. """
  410. if self._files is None:
  411. self._files = parse_file_list(self.dsc, False)
  412. return self._files
  413. @property
  414. def primary_fingerprint(self) -> str:
  415. """fingerprint of the key used to sign the .dsc"""
  416. return self.signature.primary_fingerprint
  417. @property
  418. def valid_signature(self) -> bool:
  419. """:const:`True` if the .dsc has a valid signature"""
  420. return self.signature.valid
  421. @property
  422. def weak_signature(self) -> bool:
  423. """:const:`True` if the .dsc was signed using a weak algorithm"""
  424. return self.signature.weak_signature
  425. @property
  426. def component(self) -> str:
  427. """guessed component name
  428. Might be wrong. Don't rely on this.
  429. """
  430. if 'Section' not in self.dsc:
  431. return 'main'
  432. fields = self.dsc['Section'].split('/')
  433. if len(fields) > 1:
  434. return fields[0]
  435. return "main"
  436. @property
  437. def filename(self) -> str:
  438. """filename of .dsc file"""
  439. return self._dsc_file.filename