retrieve_data.py 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069
  1. # Copyright 2013 The Distro Tracker Developers
  2. # See the COPYRIGHT file at the top-level directory of this distribution and
  3. # at http://deb.li/DTAuthors
  4. #
  5. # This file is part of Distro Tracker. It is subject to the license terms
  6. # in the LICENSE file found in the top-level directory of this
  7. # distribution and at http://deb.li/DTLicense. No part of Distro Tracker,
  8. # including this file, may be copied, modified, propagated, or distributed
  9. # except according to the terms contained in the LICENSE file.
  10. """Implements core data retrieval from various external resources."""
  11. from __future__ import unicode_literals
  12. from distro_tracker import vendor
  13. from distro_tracker.core.models import PseudoPackageName, PackageName
  14. from distro_tracker.core.models import Repository
  15. from distro_tracker.core.models import SourcePackageRepositoryEntry
  16. from distro_tracker.core.models import BinaryPackageRepositoryEntry
  17. from distro_tracker.core.models import ContributorName
  18. from distro_tracker.core.models import SourcePackage
  19. from distro_tracker.core.models import Team
  20. from distro_tracker.core.models import PackageExtractedInfo
  21. from distro_tracker.core.models import BinaryPackageName
  22. from distro_tracker.core.models import BinaryPackage
  23. from distro_tracker.core.models import SourcePackageDeps
  24. from distro_tracker.core.utils.packages import (
  25. extract_information_from_sources_entry,
  26. extract_information_from_packages_entry,
  27. AptCache)
  28. from distro_tracker.core.tasks import BaseTask
  29. from distro_tracker.core.tasks import clear_all_events_on_exception
  30. from distro_tracker.core.models import SourcePackageName, Architecture
  31. from distro_tracker.accounts.models import UserEmail
  32. from django.utils.six import reraise
  33. from django.db import transaction
  34. from django.db import models
  35. from debian import deb822
  36. import re
  37. import sys
  38. import requests
  39. import itertools
  40. import logging
  41. logger = logging.getLogger('distro_tracker.tasks')
  42. class InvalidRepositoryException(Exception):
  43. pass
  44. def update_pseudo_package_list():
  45. """
  46. Retrieves the list of all allowed pseudo packages and updates the stored
  47. list if necessary.
  48. Uses a vendor-provided function
  49. :func:`get_pseudo_package_list
  50. <distro_tracker.vendor.skeleton.rules.get_pseudo_package_list>`
  51. to get the list of currently available pseudo packages.
  52. """
  53. try:
  54. pseudo_packages, implemented = vendor.call('get_pseudo_package_list')
  55. except:
  56. # Error accessing pseudo package resource: do not update the list
  57. return
  58. if not implemented or pseudo_packages is None:
  59. return
  60. # Faster lookups than if this were a list
  61. pseudo_packages = set(pseudo_packages)
  62. for existing_package in PseudoPackageName.objects.all():
  63. if existing_package.name not in pseudo_packages:
  64. # Existing packages which are no longer considered pseudo packages
  65. # are demoted -- losing their pseudo package flag.
  66. existing_package.pseudo = False
  67. existing_package.save()
  68. else:
  69. # If an existing package remained a pseudo package there will be no
  70. # action required so it is removed from the set.
  71. pseudo_packages.remove(existing_package.name)
  72. # The left over packages in the set are the ones that do not exist.
  73. for package_name in pseudo_packages:
  74. PseudoPackageName.objects.create(name=package_name)
  75. def retrieve_repository_info(sources_list_entry):
  76. """
  77. A function which accesses a ``Release`` file for the given repository and
  78. returns a dict representing the parsed information.
  79. :rtype: dict
  80. """
  81. entry_split = sources_list_entry.split(None, 3)
  82. if len(entry_split) < 3:
  83. raise InvalidRepositoryException("Invalid sources.list entry")
  84. repository_type, url, distribution = entry_split[:3]
  85. # Access the Release file
  86. try:
  87. response = requests.get(Repository.release_file_url(url, distribution),
  88. allow_redirects=True)
  89. except requests.exceptions.RequestException as original:
  90. reraise(
  91. InvalidRepositoryException,
  92. InvalidRepositoryException(
  93. "Could not connect to {url}\n{original}".format(
  94. url=url,
  95. original=original)
  96. ),
  97. sys.exc_info()[2]
  98. )
  99. if response.status_code != 200:
  100. raise InvalidRepositoryException(
  101. "No Release file found at the URL: {url}\n"
  102. "Response status code {status_code}".format(
  103. url=url, status_code=response.status_code))
  104. # Parse the retrieved information
  105. release = deb822.Release(response.text)
  106. if not release:
  107. raise InvalidRepositoryException(
  108. "No data could be extracted from the Release file at {url}".format(
  109. url=url))
  110. REQUIRED_KEYS = (
  111. 'architectures',
  112. 'components',
  113. )
  114. # A mapping of optional keys to their default values, if any
  115. OPTIONAL_KEYS = {
  116. 'suite': distribution,
  117. 'codename': None,
  118. }
  119. # Make sure all necessary keys were found in the file
  120. for key in REQUIRED_KEYS:
  121. if key not in release:
  122. raise InvalidRepositoryException(
  123. "Property {key} not found in the Release file at {url}".format(
  124. key=key,
  125. url=url))
  126. # Finally build the return dictionary with the information about the
  127. # repository.
  128. repository_information = {
  129. 'uri': url,
  130. 'architectures': release['architectures'].split(),
  131. 'components': release['components'].split(),
  132. 'binary': repository_type == 'deb',
  133. 'source': repository_type == 'deb-src',
  134. }
  135. # Add in optional info
  136. for key, default in OPTIONAL_KEYS.items():
  137. repository_information[key] = release.get(key, default)
  138. return repository_information
  139. class PackageUpdateTask(BaseTask):
  140. """
  141. A subclass of the :class:`BaseTask <distro_tracker.core.tasks.BaseTask>`
  142. providing some methods specific to tasks dealing with package updates.
  143. """
  144. def __init__(self, force_update=False, *args, **kwargs):
  145. super(PackageUpdateTask, self).__init__(*args, **kwargs)
  146. self.force_update = force_update
  147. def set_parameters(self, parameters):
  148. if 'force_update' in parameters:
  149. self.force_update = parameters['force_update']
  150. class UpdateRepositoriesTask(PackageUpdateTask):
  151. """
  152. Performs an update of repository information.
  153. New (source and binary) packages are created if necessary and old ones are
  154. deleted. An event is emitted for each situation, allowing other tasks to
  155. perform updates based on updated package information.
  156. """
  157. PRODUCES_EVENTS = (
  158. 'new-source-package',
  159. 'new-source-package-version',
  160. 'new-source-package-in-repository',
  161. 'new-source-package-version-in-repository',
  162. 'new-binary-package',
  163. # Source package no longer found in any repository
  164. 'lost-source-package',
  165. # Source package version no longer found in the given repository
  166. 'lost-source-package-version-in-repository',
  167. # A particular version of a source package no longer found in any repo
  168. 'lost-version-of-source-package',
  169. # Binary package name no longer used by any source package
  170. 'lost-binary-package',
  171. )
  172. SOURCE_DEPENDENCY_TYPES = ('Build-Depends', 'Build-Depends-Indep')
  173. BINARY_DEPENDENCY_TYPES = ('Depends', 'Recommends', 'Suggests')
  174. def __init__(self, *args, **kwargs):
  175. super(UpdateRepositoriesTask, self).__init__(*args, **kwargs)
  176. self._all_packages = []
  177. self._all_repository_entries = []
  178. def _clear_processed_repository_entries(self):
  179. self._all_repository_entries = []
  180. def _add_processed_repository_entry(self, repository_entry):
  181. self._all_repository_entries.append(repository_entry.id)
  182. def _extract_information_from_sources_entry(self, src_pkg, stanza):
  183. entry = extract_information_from_sources_entry(stanza)
  184. # Convert the parsed data into corresponding model instances
  185. if 'architectures' in entry:
  186. # Map the list of architecture names to their objects
  187. # Discards any unknown architectures.
  188. entry['architectures'] = Architecture.objects.filter(
  189. name__in=entry['architectures'])
  190. if 'binary_packages' in entry:
  191. # Map the list of binary package names to list of existing
  192. # binary package names.
  193. binary_package_names = entry['binary_packages']
  194. existing_binaries_qs = BinaryPackageName.objects.filter(
  195. name__in=binary_package_names)
  196. existing_binaries_names = []
  197. binaries = []
  198. for binary in existing_binaries_qs:
  199. binaries.append(binary)
  200. existing_binaries_names.append(binary.name)
  201. for binary_name in binary_package_names:
  202. if binary_name not in existing_binaries_names:
  203. binary_package_name, _ = PackageName.objects.get_or_create(
  204. name=binary_name)
  205. binary_package_name.binary = True
  206. binary_package_name.save()
  207. binary_package_name = BinaryPackageName.objects.get(
  208. name=binary_name)
  209. binaries.append(binary_package_name)
  210. self.raise_event('new-binary-package', {
  211. 'name': binary_name,
  212. })
  213. entry['binary_packages'] = binaries
  214. if 'maintainer' in entry:
  215. maintainer_email, _ = UserEmail.objects.get_or_create(
  216. email=entry['maintainer']['email'])
  217. maintainer = ContributorName.objects.get_or_create(
  218. contributor_email=maintainer_email,
  219. name=entry['maintainer'].get('name', ''))[0]
  220. entry['maintainer'] = maintainer
  221. if 'uploaders' in entry:
  222. uploader_emails = [
  223. uploader['email']
  224. for uploader in entry['uploaders']
  225. ]
  226. uploader_names = [
  227. uploader.get('name', '')
  228. for uploader in entry['uploaders']
  229. ]
  230. existing_contributor_emails_qs = UserEmail.objects.filter(
  231. email__in=uploader_emails)
  232. existing_contributor_emails = {
  233. contributor.email: contributor
  234. for contributor in existing_contributor_emails_qs
  235. }
  236. uploaders = []
  237. for email, name in zip(uploader_emails, uploader_names):
  238. if email not in existing_contributor_emails:
  239. contributor_email, _ = UserEmail.objects.get_or_create(
  240. email=email)
  241. existing_contributor_emails[email] = contributor_email
  242. else:
  243. contributor_email = existing_contributor_emails[email]
  244. uploaders.append(ContributorName.objects.get_or_create(
  245. contributor_email=contributor_email,
  246. name=name)[0]
  247. )
  248. entry['uploaders'] = uploaders
  249. return entry
  250. def _extract_information_from_packages_entry(self, bin_pkg, stanza):
  251. entry = extract_information_from_packages_entry(stanza)
  252. return entry
  253. def _update_sources_file(self, repository, sources_file):
  254. for stanza in deb822.Sources.iter_paragraphs(sources_file):
  255. allow, implemented = vendor.call('allow_package', stanza)
  256. if allow is not None and implemented and not allow:
  257. # The vendor-provided function indicates that the package
  258. # should not be included
  259. continue
  260. src_pkg_name, created = SourcePackageName.objects.get_or_create(
  261. name=stanza['package']
  262. )
  263. if created:
  264. self.raise_event('new-source-package', {
  265. 'name': src_pkg_name.name
  266. })
  267. src_pkg, created_new_version = SourcePackage.objects.get_or_create(
  268. source_package_name=src_pkg_name,
  269. version=stanza['version']
  270. )
  271. if created_new_version or self.force_update:
  272. if created_new_version:
  273. self.raise_event('new-source-package-version', {
  274. 'name': src_pkg.name,
  275. 'version': src_pkg.version,
  276. 'pk': src_pkg.pk,
  277. })
  278. # Extract package data from Sources
  279. entry = self._extract_information_from_sources_entry(
  280. src_pkg, stanza)
  281. # Update the source package information based on the newly
  282. # extracted data.
  283. src_pkg.update(**entry)
  284. src_pkg.save()
  285. if not repository.has_source_package(src_pkg):
  286. # Does it have any version of the package?
  287. if not repository.has_source_package_name(src_pkg.name):
  288. self.raise_event('new-source-package-in-repository', {
  289. 'name': src_pkg.name,
  290. 'repository': repository.name,
  291. })
  292. # Add it to the repository
  293. kwargs = {
  294. 'priority': stanza.get('priority', ''),
  295. 'section': stanza.get('section', ''),
  296. }
  297. entry = repository.add_source_package(src_pkg, **kwargs)
  298. self.raise_event('new-source-package-version-in-repository', {
  299. 'name': src_pkg.name,
  300. 'version': src_pkg.version,
  301. 'repository': repository.name,
  302. })
  303. else:
  304. # We get the entry to mark that the package version is still in
  305. # the repository.
  306. entry = SourcePackageRepositoryEntry.objects.get(
  307. repository=repository,
  308. source_package=src_pkg
  309. )
  310. self._add_processed_repository_entry(entry)
  311. def get_source_for_binary(self, stanza):
  312. """
  313. :param stanza: a ``Packages`` file entry
  314. :returns: A ``(source_name, source_version)`` pair for the binary
  315. package described by the entry
  316. """
  317. source_name = (
  318. stanza['source']
  319. if 'source' in stanza else
  320. stanza['package'])
  321. # Extract the source version, if given in the Source field
  322. match = re.match(r'(.+) \((.+)\)', source_name)
  323. if match:
  324. source_name, source_version = match.group(1), match.group(2)
  325. else:
  326. source_version = stanza['version']
  327. return source_name, source_version
  328. def _update_packages_file(self, repository, packages_file):
  329. for stanza in deb822.Packages.iter_paragraphs(packages_file):
  330. bin_pkg_name, created = BinaryPackageName.objects.get_or_create(
  331. name=stanza['package']
  332. )
  333. # Find the matching SourcePackage for the binary package
  334. source_name, source_version = self.get_source_for_binary(stanza)
  335. src_pkg, _ = SourcePackage.objects.get_or_create(
  336. source_package_name=SourcePackageName.objects.get_or_create(
  337. name=source_name)[0],
  338. version=source_version)
  339. bin_pkg, created_new_version = BinaryPackage.objects.get_or_create(
  340. binary_package_name=bin_pkg_name,
  341. version=stanza['version'],
  342. source_package=src_pkg
  343. )
  344. if created_new_version:
  345. # Since it's a new version, extract package data from Packages
  346. entry = self._extract_information_from_packages_entry(
  347. bin_pkg, stanza)
  348. # Update the binary package information based on the newly
  349. # extracted data.
  350. bin_pkg.update(**entry)
  351. bin_pkg.save()
  352. if not repository.has_binary_package(bin_pkg):
  353. # Add it to the repository
  354. architecture, _ = Architecture.objects.get_or_create(
  355. name=stanza['architecture'])
  356. kwargs = {
  357. 'priority': stanza.get('priority', ''),
  358. 'section': stanza.get('section', ''),
  359. 'architecture': architecture,
  360. }
  361. entry = repository.add_binary_package(bin_pkg, **kwargs)
  362. else:
  363. # We get the entry to mark that the package version is still in
  364. # the repository.
  365. entry = BinaryPackageRepositoryEntry.objects.get(
  366. repository=repository,
  367. binary_package=bin_pkg)
  368. self._add_processed_repository_entry(entry)
  369. def _remove_query_set_if_count_zero(self, qs, count_field,
  370. event_generator=None):
  371. """
  372. Removes elements from the given query set if their count of the given
  373. ``count_field`` is ``0``.
  374. :param qs: Instances which should be deleted in case their count of the
  375. field ``count_field`` is 0.
  376. :type qs: :class:`QuerySet <django.db.models.query.QuerySet>`
  377. :param count_field: Each instance in ``qs`` that has a 0 count for the
  378. field with this name is deleted.
  379. :type count_field: string
  380. :param event_generator: A ``callable`` which returns a
  381. ``(name, arguments)`` pair describing the event which should be
  382. raised based on the model instance given to it as an argument.
  383. :type event_generator: ``callable``
  384. """
  385. qs = qs.annotate(count=models.Count(count_field))
  386. qs = qs.filter(count=0)
  387. if event_generator:
  388. for item in qs:
  389. self.raise_event(*event_generator(item))
  390. qs.delete()
  391. def _remove_obsolete_packages(self):
  392. self.log("Removing obsolete source packages")
  393. # Clean up package versions which no longer exist in any repository.
  394. self._remove_query_set_if_count_zero(
  395. SourcePackage.objects.all(),
  396. 'repository',
  397. lambda source_package: (
  398. 'lost-version-of-source-package', {
  399. 'name': source_package.name,
  400. 'version': source_package.version,
  401. }
  402. )
  403. )
  404. # Clean up names which no longer exist.
  405. self._remove_query_set_if_count_zero(
  406. SourcePackageName.objects.all(),
  407. 'source_package_versions',
  408. lambda package: (
  409. 'lost-source-package', {
  410. 'name': package.name,
  411. }
  412. )
  413. )
  414. # Clean up binary package names which are no longer used by any source
  415. # package.
  416. self._remove_query_set_if_count_zero(
  417. BinaryPackageName.objects.all(),
  418. 'sourcepackage',
  419. lambda binary_package_name: (
  420. 'lost-binary-package', {
  421. 'name': binary_package_name.name,
  422. }
  423. )
  424. )
  425. def _update_repository_entries(self, all_entries_qs, event_generator=None):
  426. """
  427. Removes all repository entries which are no longer found in the
  428. repository after the last update.
  429. If the ``event_generator`` argument is provided, an event returned by
  430. the function is raised for each removed entry.
  431. :param all_entries_qs: All currently existing entries which should be
  432. filtered to only contain the ones still found after the update.
  433. :type all_entries_qs:
  434. :class:`QuerySet <django.db.models.query.QuerySet>`
  435. :event_generator: Takes a repository entry as a parameter and returns a
  436. two-tuple of ``(event_name, event_arguments)``. An event with the
  437. return parameters is raised by the function for each removed entry.
  438. :type event_generator: callable
  439. """
  440. # Out of all entries in this repository, only those found in
  441. # the last update need to stay, so exclude them from the delete
  442. all_entries_qs = all_entries_qs.exclude(
  443. id__in=self._all_repository_entries)
  444. # Emit events for all packages that were removed from the repository
  445. if event_generator:
  446. for entry in all_entries_qs:
  447. self.raise_event(*event_generator(entry))
  448. all_entries_qs.delete()
  449. self._clear_processed_repository_entries()
  450. def extract_package_versions(self, file_name):
  451. """
  452. :param file_name: The name of the file from which package versions
  453. should be extracted.
  454. :type file_name: string
  455. :returns: A dict mapping package names to a list of versions found in
  456. Deb822 formatted file.
  457. """
  458. with open(file_name, 'r') as packages_file:
  459. packages = {}
  460. for stanza in deb822.Deb822.iter_paragraphs(packages_file):
  461. package_name, version = stanza['package'], stanza['version']
  462. packages.setdefault(package_name, [])
  463. packages[package_name].append(version)
  464. return packages
  465. def _mark_file_not_processed(self, repository, file_name, entry_manager):
  466. """
  467. The given ``Sources`` or ``Packages`` file has not been changed in the
  468. last update. This method marks all package versions found in it as
  469. still existing in order to avoid deleting them.
  470. :param repository: The repository to which the file is associated
  471. :type repository:
  472. :class:`Repository <distro_tracker.core.models.Repository>`
  473. :param file_name: The name of the file whose packages should be saved
  474. :param entry_manager: The manager instance which handles the package
  475. entries.
  476. :type entry_manager: :class:`Manager <django.db.models.Manager>`
  477. """
  478. # Extract all package versions from the file
  479. packages = self.extract_package_versions(file_name)
  480. # Only issue one DB query to retrieve the entries for packages with
  481. # the given names
  482. repository_entries = \
  483. entry_manager.filter_by_package_name(packages.keys())
  484. repository_entries = repository_entries.filter(
  485. repository=repository)
  486. repository_entries = repository_entries.select_related()
  487. # For each of those entries, make sure to keep only the ones
  488. # corresponding to the version found in the sources file
  489. for entry in repository_entries:
  490. if entry.version in packages[entry.name]:
  491. self._add_processed_repository_entry(entry)
  492. def group_files_by_repository(self, cached_files):
  493. """
  494. :param cached_files: A list of ``(repository, file_name)`` pairs
  495. :returns: A dict mapping repositories to all file names found for that
  496. repository.
  497. """
  498. repository_files = {}
  499. for repository, file_name in cached_files:
  500. repository_files.setdefault(repository, [])
  501. repository_files[repository].append(file_name)
  502. return repository_files
  503. def update_sources_files(self, updated_sources):
  504. """
  505. Performs an update of tracked packages based on the updated Sources
  506. files.
  507. :param updated_sources: A list of ``(repository, sources_file_name)``
  508. pairs giving the Sources files which were updated and should be
  509. used to update the Distro Tracker tracked information too.
  510. """
  511. # Group all files by repository to which they belong
  512. repository_files = self.group_files_by_repository(updated_sources)
  513. for repository, sources_files in repository_files.items():
  514. with transaction.atomic():
  515. self.log("Processing Sources files of %s repository",
  516. repository.shorthand)
  517. # First update package information based on updated files
  518. for sources_file in sources_files:
  519. with open(sources_file) as sources_fd:
  520. self._update_sources_file(repository, sources_fd)
  521. # Mark package versions found in un-updated files as still
  522. # existing
  523. all_sources = \
  524. self.apt_cache.get_sources_files_for_repository(repository)
  525. for sources_file in all_sources:
  526. if sources_file not in sources_files:
  527. self._mark_file_not_processed(
  528. repository,
  529. sources_file,
  530. SourcePackageRepositoryEntry.objects)
  531. # When all the files for the repository are handled, update
  532. # which packages are still found in it.
  533. self._update_repository_entries(
  534. SourcePackageRepositoryEntry.objects.filter(
  535. repository=repository),
  536. lambda entry: (
  537. 'lost-source-package-version-in-repository', {
  538. 'name': entry.source_package.name,
  539. 'version': entry.source_package.version,
  540. 'repository': entry.repository.name,
  541. })
  542. )
  543. with transaction.atomic():
  544. # When all repositories are handled, update which packages are
  545. # still found in at least one repository.
  546. self._remove_obsolete_packages()
  547. def update_packages_files(self, updated_packages):
  548. """
  549. Performs an update of tracked packages based on the updated Packages
  550. files.
  551. :param updated_sources: A list of ``(repository, packages_file_name)``
  552. pairs giving the Packages files which were updated and should be
  553. used to update the Distro Tracker tracked information too.
  554. """
  555. # Group all files by repository to which they belong
  556. repository_files = self.group_files_by_repository(updated_packages)
  557. for repository, packages_files in repository_files.items():
  558. self.log("Processing Packages files of %s repository",
  559. repository.shorthand)
  560. # First update package information based on updated files
  561. for packages_file in packages_files:
  562. with open(packages_file) as packages_fd:
  563. self._update_packages_file(repository, packages_fd)
  564. # Mark package versions found in un-updated files as still existing
  565. all_sources = \
  566. self.apt_cache.get_packages_files_for_repository(repository)
  567. for packages_file in all_sources:
  568. if packages_file not in packages_files:
  569. self._mark_file_not_processed(
  570. repository, packages_file,
  571. BinaryPackageRepositoryEntry.objects)
  572. # When all the files for the repository are handled, update
  573. # which packages are still found in it.
  574. self._update_repository_entries(
  575. BinaryPackageRepositoryEntry.objects.filter(
  576. repository=repository))
  577. def _update_dependencies_for_source(self,
  578. stanza,
  579. dependency_types):
  580. """
  581. Updates the dependencies for a source package based on the ones found
  582. in the given ``Packages`` or ``Sources`` stanza.
  583. :param source_name: The name of the source package for which the
  584. dependencies are updated.
  585. :param stanza: The ``Packages`` or ``Sources`` entry
  586. :param dependency_type: A list of dependency types which should be
  587. considered (e.g. Build-Depends, Recommends, etc.)
  588. :param source_to_binary_deps: The dictionary which should be updated
  589. with the new dependencies. Maps source names to a list of dicts
  590. each describing a dependency.
  591. """
  592. binary_dependencies = []
  593. for dependency_type in dependency_types:
  594. # The Deb822 instance is case sensitive when it comes to relations
  595. dependencies = stanza.relations.get(dependency_type.lower(), ())
  596. for dependency in itertools.chain(*dependencies):
  597. binary_name = dependency['name']
  598. binary_dependencies.append({
  599. 'dependency_type': dependency_type,
  600. 'binary': binary_name,
  601. })
  602. return binary_dependencies
  603. def _process_source_to_binary_deps(self, source_to_binary_deps, all_sources,
  604. bin_to_src, default_repository):
  605. dependency_instances = []
  606. for source_name, dependencies in source_to_binary_deps.items():
  607. if source_name not in all_sources:
  608. continue
  609. # All dependencies for the current source package.
  610. all_dependencies = {}
  611. for dependency in dependencies:
  612. binary_name = dependency['binary']
  613. dependency_type = dependency.pop('dependency_type')
  614. if binary_name not in bin_to_src:
  615. continue
  616. for source_dependency in bin_to_src[binary_name]:
  617. if source_name == source_dependency:
  618. continue
  619. source_dependencies = \
  620. all_dependencies.setdefault(source_dependency, {})
  621. source_dependencies.setdefault(dependency_type, [])
  622. if dependency not in source_dependencies[dependency_type]:
  623. source_dependencies[dependency_type].append(dependency)
  624. # Create the dependency instances for the current source package.
  625. for dependency_name, details in all_dependencies.items():
  626. if dependency_name in all_sources:
  627. build_dep = any(dependency_type in details
  628. for dependency_type
  629. in self.SOURCE_DEPENDENCY_TYPES)
  630. binary_dep = any(dependency_type in details
  631. for dependency_type
  632. in self.BINARY_DEPENDENCY_TYPES)
  633. dependency_instances.append(
  634. SourcePackageDeps(
  635. source=all_sources[source_name],
  636. dependency=all_sources[dependency_name],
  637. build_dep=build_dep,
  638. binary_dep=binary_dep,
  639. repository=default_repository,
  640. details=details))
  641. return dependency_instances
  642. def update_dependencies(self):
  643. """
  644. Updates source-to-source package dependencies stemming from
  645. build bependencies and their binary packages' dependencies.
  646. """
  647. # Build the dependency mapping
  648. try:
  649. default_repository = Repository.objects.get(default=True)
  650. except Repository.DoesNotExist:
  651. self.log("No default repository, no dependencies created.",
  652. level=logging.WARNING)
  653. return
  654. self.log("Parsing files to discover dependencies")
  655. sources_files = self.apt_cache.get_sources_files_for_repository(
  656. default_repository)
  657. packages_files = self.apt_cache.get_packages_files_for_repository(
  658. default_repository)
  659. bin_to_src = {}
  660. source_to_binary_deps = {}
  661. # First builds a list of binary dependencies of all source packages
  662. # based on the Sources file.
  663. for sources_file in sources_files:
  664. with open(sources_file) as sources_fd:
  665. for stanza in deb822.Sources.iter_paragraphs(sources_fd):
  666. source_name = stanza['package']
  667. for binary in itertools.chain(*stanza.relations['binary']):
  668. sources_set = bin_to_src.setdefault(binary['name'],
  669. set())
  670. sources_set.add(source_name)
  671. dependencies = source_to_binary_deps.setdefault(source_name,
  672. [])
  673. dependencies.extend(self._update_dependencies_for_source(
  674. stanza,
  675. self.SOURCE_DEPENDENCY_TYPES))
  676. # Then a list of binary dependencies based on the Packages file.
  677. for packages_file in packages_files:
  678. with open(packages_file) as packages_fd:
  679. for stanza in deb822.Packages.iter_paragraphs(packages_fd):
  680. binary_name = stanza['package']
  681. source_name, source_version = \
  682. self.get_source_for_binary(stanza)
  683. sources_set = bin_to_src.setdefault(binary_name, set())
  684. sources_set.add(source_name)
  685. new_dependencies = self._update_dependencies_for_source(
  686. stanza,
  687. self.BINARY_DEPENDENCY_TYPES)
  688. for dependency in new_dependencies:
  689. dependency['source_binary'] = binary_name
  690. dependencies = source_to_binary_deps.setdefault(source_name,
  691. [])
  692. dependencies.extend(new_dependencies)
  693. # The binary packages are matched with their source packages and each
  694. # source to source dependency created.
  695. all_sources = {
  696. source.name: source
  697. for source in SourcePackageName.objects.all()
  698. }
  699. self.log("Creating in-memory SourcePackageDeps")
  700. # Keeps a list of SourcePackageDeps instances which are to be bulk
  701. # created in the end.
  702. dependency_instances = \
  703. self._process_source_to_binary_deps(source_to_binary_deps,
  704. all_sources, bin_to_src,
  705. default_repository)
  706. # Create all the model instances in one transaction
  707. self.log("Committing SourcePackagesDeps to database")
  708. SourcePackageDeps.objects.all().delete()
  709. SourcePackageDeps.objects.bulk_create(dependency_instances)
  710. @clear_all_events_on_exception
  711. def execute(self):
  712. self.log("Updating apt's cache")
  713. self.apt_cache = AptCache()
  714. updated_sources, updated_packages = (
  715. self.apt_cache.update_repositories(self.force_update)
  716. )
  717. self.log("Updating data from Sources files")
  718. self.update_sources_files(updated_sources)
  719. self.log("Updating data from Packages files")
  720. self.update_packages_files(updated_packages)
  721. self.log("Updating dependencies")
  722. self.update_dependencies()
  723. class UpdatePackageGeneralInformation(PackageUpdateTask):
  724. """
  725. Updates the general information regarding packages.
  726. """
  727. DEPENDS_ON_EVENTS = (
  728. 'new-source-package-version-in-repository',
  729. 'lost-source-package-version-in-repository',
  730. )
  731. def __init__(self, *args, **kwargs):
  732. super(UpdatePackageGeneralInformation, self).__init__(*args, **kwargs)
  733. self.packages = set()
  734. def process_event(self, event):
  735. self.packages.add(event.arguments['name'])
  736. def _get_info_from_entry(self, entry):
  737. srcpkg = entry.source_package
  738. general_information = {
  739. 'name': srcpkg.name,
  740. 'priority': entry.priority,
  741. 'section': entry.section,
  742. 'version': entry.source_package.version,
  743. 'maintainer': srcpkg.maintainer.to_dict(),
  744. 'uploaders': [
  745. uploader.to_dict()
  746. for uploader in srcpkg.uploaders.all()
  747. ],
  748. 'architectures': list(
  749. map(str, srcpkg.architectures.order_by('name'))),
  750. 'standards_version': srcpkg.standards_version,
  751. 'vcs': srcpkg.vcs,
  752. }
  753. return general_information
  754. @clear_all_events_on_exception
  755. def execute(self):
  756. package_names = set(
  757. event.arguments['name']
  758. for event in self.get_all_events()
  759. )
  760. with transaction.atomic():
  761. if self.is_initial_task():
  762. self.log("Updating general infos of all packages")
  763. qs = SourcePackageName.objects.all()
  764. else:
  765. self.log("Updating general infos of %d packages",
  766. len(package_names))
  767. qs = SourcePackageName.objects.filter(name__in=package_names)
  768. for package in qs:
  769. entry = package.main_entry
  770. if entry is None:
  771. continue
  772. general, _ = PackageExtractedInfo.objects.get_or_create(
  773. key='general',
  774. package=package
  775. )
  776. general.value = self._get_info_from_entry(entry)
  777. general.save()
  778. class UpdateVersionInformation(PackageUpdateTask):
  779. """
  780. Updates extracted version information about packages.
  781. """
  782. DEPENDS_ON_EVENTS = (
  783. 'new-source-package-version-in-repository',
  784. 'lost-source-package-version-in-repository',
  785. )
  786. def __init__(self, *args, **kwargs):
  787. super(UpdateVersionInformation, self).__init__(*args, **kwargs)
  788. self.packages = set()
  789. def process_event(self, event):
  790. self.packages.add(event.arguments['name'])
  791. def _extract_versions_for_package(self, package_name):
  792. """
  793. Returns a list where each element is a dictionary with the following
  794. keys: repository_name, repository_shorthand, package_version.
  795. """
  796. version_list = []
  797. for repository in package_name.repositories:
  798. if repository.get_flags()['hidden']:
  799. continue
  800. entry = repository.get_source_package_entry(package_name)
  801. version_list.append({
  802. 'repository': {
  803. 'name': entry.repository.name,
  804. 'shorthand': entry.repository.shorthand,
  805. 'codename': entry.repository.codename,
  806. 'suite': entry.repository.suite,
  807. 'id': entry.repository.id,
  808. },
  809. 'version': entry.source_package.version,
  810. })
  811. versions = {
  812. 'version_list': version_list,
  813. 'default_pool_url': package_name.main_entry.directory_url,
  814. }
  815. return versions
  816. @clear_all_events_on_exception
  817. def execute(self):
  818. package_names = set(
  819. event.arguments['name']
  820. for event in self.get_all_events()
  821. )
  822. with transaction.atomic():
  823. if self.is_initial_task():
  824. self.log("Updating versions tables of all packages")
  825. qs = SourcePackageName.objects.all()
  826. else:
  827. self.log("Updating versions tables of %d packages",
  828. len(package_names))
  829. qs = SourcePackageName.objects.filter(name__in=package_names)
  830. for package in qs:
  831. versions, _ = PackageExtractedInfo.objects.get_or_create(
  832. key='versions',
  833. package=package)
  834. versions.value = self._extract_versions_for_package(package)
  835. versions.save()
  836. class UpdateSourceToBinariesInformation(PackageUpdateTask):
  837. """
  838. Updates extracted source-binary mapping for packages.
  839. These are the binary packages which appear in the binary panel on each
  840. source package's Web page.
  841. """
  842. DEPENDS_ON_EVENTS = (
  843. 'new-source-package-version-in-repository',
  844. 'lost-source-package-version-in-repository',
  845. )
  846. def __init__(self, *args, **kwargs):
  847. super(UpdateSourceToBinariesInformation, self).__init__(*args, **kwargs)
  848. self.packages = set()
  849. def process_event(self, event):
  850. self.packages.add(event.arguments['name'])
  851. def _get_all_binaries(self, package):
  852. """
  853. Returns a list representing binary packages linked to the given
  854. source package.
  855. """
  856. repository = package.main_entry.repository
  857. return [
  858. {
  859. 'name': pkg.name,
  860. 'repository': {
  861. 'name': repository.name,
  862. 'shorthand': repository.shorthand,
  863. 'suite': repository.suite,
  864. 'codename': repository.codename,
  865. 'id': repository.id,
  866. },
  867. }
  868. for pkg in package.main_version.binary_packages.all()
  869. ]
  870. @clear_all_events_on_exception
  871. def execute(self):
  872. package_names = set(
  873. event.arguments['name']
  874. for event in self.get_all_events()
  875. )
  876. with transaction.atomic():
  877. if self.is_initial_task():
  878. qs = SourcePackageName.objects.all()
  879. else:
  880. qs = SourcePackageName.objects.filter(name__in=package_names)
  881. for package in qs:
  882. binaries, _ = PackageExtractedInfo.objects.get_or_create(
  883. key='binaries',
  884. package=package)
  885. binaries.value = self._get_all_binaries(package)
  886. binaries.save()
  887. class UpdateTeamPackagesTask(BaseTask):
  888. """
  889. Based on new source packages detected during a repository update, the task
  890. updates teams to include new packages which are associated with its
  891. maintainer email.
  892. """
  893. DEPENDS_ON_EVENTS = (
  894. 'new-source-package-version-in-repository',
  895. )
  896. def add_package_to_maintainer_teams(self, package, maintainer):
  897. """
  898. Adds the given package to all the teams where the given maintainer is
  899. set as the maintainer email.
  900. :param package: The package to add to the maintainers teams.
  901. :type package: :class:`SourcePackageName
  902. <distro_tracker.core.models.SourcePackageName>`
  903. :param maintainer: The maintainer to whose teams the package should be
  904. added.
  905. :type maintainer:
  906. :class:`ContributorName <distro_tracker.core.models.UserEmail>`
  907. """
  908. teams = Team.objects.filter(maintainer_email__email=maintainer.email)
  909. for team in teams:
  910. team.packages.add(package)
  911. def execute(self):
  912. # We only need to process the packages which are added to the default
  913. # repository.
  914. try:
  915. default_repository = Repository.objects.get(default=True)
  916. except Repository.DoesNotExist:
  917. return
  918. # Retrieve all packages that have been added to the repository
  919. package_versions = {
  920. event.arguments['name']: event.arguments['version']
  921. for event in self.get_all_events()
  922. if event.arguments['repository'] == default_repository.name
  923. }
  924. filters = {
  925. 'repository_entries__repository': default_repository,
  926. 'source_package_name__name__in': package_versions.keys(),
  927. }
  928. source_packages = SourcePackage.objects.filter(**filters)
  929. source_packages = source_packages.select_related()
  930. for source_package in source_packages:
  931. package_name = source_package.name
  932. if source_package.version == package_versions[package_name]:
  933. # Add the package to the maintainer's teams packages
  934. package = source_package.source_package_name
  935. maintainer = source_package.maintainer
  936. self.add_package_to_maintainer_teams(package, maintainer)
  937. # Add the package to all the uploaders' teams packages
  938. for uploader in source_package.uploaders.all():
  939. self.add_package_to_maintainer_teams(package, uploader)