cruft.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. """
  2. helper functions for cruft-report
  3. @contact: Debian FTPMaster <ftpmaster@debian.org>
  4. @copyright 2011 Torsten Werner <twerner@debian.org>
  5. """
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. ################################################################################
  18. from __future__ import print_function
  19. from daklib.dbconn import *
  20. from sqlalchemy import func
  21. from sqlalchemy.orm import object_session, aliased
  22. def newer_version(lowersuite_name, highersuite_name, session, include_equal=False):
  23. '''
  24. Finds newer versions in lowersuite_name than in highersuite_name. Returns a
  25. list of tuples (source, higherversion, lowerversion) where higherversion is
  26. the newest version from highersuite_name and lowerversion is the newest
  27. version from lowersuite_name.
  28. '''
  29. lowersuite = get_suite(lowersuite_name, session)
  30. highersuite = get_suite(highersuite_name, session)
  31. def get_suite_sources(suite):
  32. q1 = session.query(DBSource.source,
  33. func.max(DBSource.version).label('version')). \
  34. with_parent(suite). \
  35. group_by(DBSource.source). \
  36. subquery()
  37. return aliased(q1)
  38. def get_suite_binaries(suite):
  39. q1 = session.query(DBBinary.package,
  40. DBSource.source,
  41. func.max(DBSource.version).label('version'),
  42. Architecture.arch_string,
  43. func.max(DBBinary.version).label('binversion')). \
  44. join(DBSource). \
  45. with_parent(suite). \
  46. join(Architecture). \
  47. group_by(
  48. DBBinary.package,
  49. DBSource.source,
  50. Architecture.arch_string,
  51. ). \
  52. subquery()
  53. return aliased(q1)
  54. highq = get_suite_sources(highersuite)
  55. lowq = get_suite_sources(lowersuite)
  56. query = session.query(
  57. highq.c.source,
  58. highq.c.version.label('higherversion'),
  59. lowq.c.version.label('lowerversion')
  60. ). \
  61. join(lowq, highq.c.source == lowq.c.source)
  62. if include_equal:
  63. query = query.filter(highq.c.version <= lowq.c.version)
  64. else:
  65. query = query.filter(highq.c.version < lowq.c.version)
  66. list = []
  67. # get all sources that have a higher version in lowersuite than in
  68. # highersuite
  69. for (source, higherversion, lowerversion) in query:
  70. q1 = session.query(DBBinary.package,
  71. DBSource.source,
  72. DBSource.version,
  73. Architecture.arch_string
  74. ). \
  75. join(DBSource). \
  76. with_parent(highersuite). \
  77. join(Architecture). \
  78. filter(DBSource.source == source). \
  79. subquery()
  80. q2 = session.query(q1.c.arch_string).group_by(q1.c.arch_string)
  81. # all architectures for which source has binaries in highersuite
  82. archs_high = set(x[0] for x in q2.all())
  83. highq = get_suite_binaries(highersuite)
  84. lowq = get_suite_binaries(lowersuite)
  85. query = session.query(highq.c.arch_string). \
  86. join(lowq, highq.c.source == lowq.c.source). \
  87. filter(highq.c.arch_string == lowq.c.arch_string). \
  88. filter(highq.c.package == lowq.c.package). \
  89. filter(highq.c.source == source)
  90. if include_equal:
  91. query = query. \
  92. filter(highq.c.binversion <= lowq.c.binversion). \
  93. filter(highq.c.version <= lowq.c.version)
  94. else:
  95. query = query. \
  96. filter(highq.c.binversion < lowq.c.binversion). \
  97. filter(highq.c.version < lowq.c.version)
  98. query = query.group_by(highq.c.arch_string)
  99. # all architectures for which source has a newer binary in lowersuite
  100. archs_newer = set(x[0] for x in query.all())
  101. # if has at least one binary in lowersuite which is newer than the one
  102. # in highersuite on each architecture for which source has binaries in
  103. # highersuite, we know that the builds for all relevant architecture
  104. # are done, so we can remove the old source with it's binaries
  105. if (archs_newer >= archs_high):
  106. list.append((source, higherversion, lowerversion))
  107. list.sort()
  108. return list
  109. def get_package_names(suite):
  110. '''
  111. Returns a query that selects all distinct package names from suite ordered
  112. by package name.
  113. '''
  114. session = object_session(suite)
  115. return session.query(DBBinary.package).with_parent(suite). \
  116. group_by(DBBinary.package).order_by(DBBinary.package)
  117. class NamedSource(object):
  118. '''
  119. A source package identified by its name with all of its versions in a
  120. suite.
  121. '''
  122. def __init__(self, suite, source):
  123. self.source = source
  124. query = suite.sources.filter_by(source=source). \
  125. order_by(DBSource.version)
  126. self.versions = [src.version for src in query]
  127. def __str__(self):
  128. return "%s(%s)" % (self.source, ", ".join(self.versions))
  129. class DejavuBinary(object):
  130. '''
  131. A binary package identified by its name which gets built by multiple source
  132. packages in a suite. The architecture is ignored which leads to the
  133. following corner case, e.g.:
  134. If a source package 'foo-mips' that builds a binary package 'foo' on mips
  135. and another source package 'foo-mipsel' builds a binary package with the
  136. same name 'foo' on mipsel then the binary package 'foo' will be reported as
  137. built from multiple source packages.
  138. '''
  139. def __init__(self, suite, package):
  140. self.package = package
  141. session = object_session(suite)
  142. # We need a subquery to make sure that both binary and source packages
  143. # are in the right suite.
  144. bin_query = suite.binaries.filter_by(package=package).subquery()
  145. src_query = session.query(DBSource.source).with_parent(suite). \
  146. join(bin_query).order_by(DBSource.source).group_by(DBSource.source)
  147. self.sources = []
  148. if src_query.count() > 1:
  149. for source, in src_query:
  150. self.sources.append(str(NamedSource(suite, source)))
  151. def has_multiple_sources(self):
  152. 'Has the package been built by multiple sources?'
  153. return len(self.sources) > 1
  154. def __str__(self):
  155. return "%s built by: %s" % (self.package, ", ".join(self.sources))
  156. def report_multiple_source(suite):
  157. '''
  158. Reports binary packages built from multiple source package with different
  159. names.
  160. '''
  161. print("Built from multiple source packages")
  162. print("-----------------------------------")
  163. print()
  164. for package, in get_package_names(suite):
  165. binary = DejavuBinary(suite, package)
  166. if binary.has_multiple_sources():
  167. print(binary)
  168. print()
  169. def query_without_source(suite_id, session):
  170. """searches for arch: all packages from suite that do no longer
  171. reference a source package in the same suite
  172. subquery unique_binaries: selects all packages with only 1 version
  173. in suite since 'dak rm' does not allow to specify version numbers"""
  174. query = """
  175. with unique_binaries as
  176. (select package, max(version) as version, max(source) as source
  177. from bin_associations_binaries
  178. where architecture = 2 and suite = :suite_id
  179. group by package having count(*) = 1)
  180. select ub.package, ub.version
  181. from unique_binaries ub
  182. left join src_associations_src sas
  183. on ub.source = sas.src and sas.suite = :suite_id
  184. where sas.id is null
  185. order by ub.package"""
  186. return session.execute(query, {'suite_id': suite_id})
  187. def queryNBS(suite_id, session):
  188. """This one is really complex. It searches arch != all packages that
  189. are no longer built from current source packages in suite.
  190. temp table unique_binaries: will be populated with packages that
  191. have only one version in suite because 'dak rm' does not allow
  192. specifying version numbers
  193. temp table newest_binaries: will be populated with packages that are
  194. built from current sources
  195. subquery uptodate_arch: returns all architectures built from current
  196. sources
  197. subquery unique_binaries_uptodate_arch: returns all packages in
  198. architectures from uptodate_arch
  199. subquery unique_binaries_uptodate_arch_agg: same as
  200. unique_binaries_uptodate_arch but with column architecture
  201. aggregated to array
  202. subquery uptodate_packages: similar to uptodate_arch but returns all
  203. packages built from current sources
  204. subquery outdated_packages: returns all packages with architectures
  205. no longer built from current source
  206. """
  207. query = """
  208. with
  209. unique_binaries as
  210. (select
  211. bab.package,
  212. bab.architecture,
  213. max(bab.source) as source
  214. from bin_associations_binaries bab
  215. where bab.suite = :suite_id and bab.architecture > 2
  216. group by package, architecture having count(*) = 1),
  217. newest_binaries as
  218. (select ub.package, ub.architecture, nsa.source, nsa.version
  219. from unique_binaries ub
  220. join newest_src_association nsa
  221. on ub.source = nsa.src and nsa.suite = :suite_id),
  222. uptodate_arch as
  223. (select architecture, source, version
  224. from newest_binaries
  225. group by architecture, source, version),
  226. unique_binaries_uptodate_arch as
  227. (select ub.package, ub.architecture, ua.source, ua.version
  228. from unique_binaries ub
  229. join source s
  230. on ub.source = s.id
  231. join uptodate_arch ua
  232. on ub.architecture = ua.architecture and s.source = ua.source),
  233. unique_binaries_uptodate_arch_agg as
  234. (select ubua.package,
  235. array(select unnest(array_agg(a.arch_string)) order by 1) as arch_list,
  236. ubua.source, ubua.version
  237. from unique_binaries_uptodate_arch ubua
  238. join architecture a
  239. on ubua.architecture = a.id
  240. group by ubua.source, ubua.version, ubua.package),
  241. uptodate_packages as
  242. (select package, source, version
  243. from newest_binaries
  244. group by package, source, version),
  245. outdated_packages as
  246. (select array(select unnest(array_agg(package)) order by 1) as pkg_list,
  247. arch_list, source, version
  248. from unique_binaries_uptodate_arch_agg
  249. where package not in
  250. (select package from uptodate_packages)
  251. group by arch_list, source, version)
  252. select * from outdated_packages order by source"""
  253. return session.execute(query, {'suite_id': suite_id})
  254. def queryNBS_metadata(suite_id, session):
  255. """searches for NBS packages based on metadata extraction of the
  256. newest source for a given suite"""
  257. query = """
  258. select string_agg(bin.package, ' ' order by bin.package), (
  259. select arch_string
  260. from architecture
  261. where id = bin.architecture) as architecture, src.source, newsrc.version
  262. from bin_associations_binaries bin
  263. join src_associations_src src
  264. on src.src = bin.source
  265. and src.suite = bin.suite
  266. join newest_src_association newsrc
  267. on newsrc.source = src.source
  268. and newsrc.version != src.version
  269. and newsrc.suite = bin.suite
  270. where bin.suite = :suite_id
  271. and bin.package not in (
  272. select trim(' \n' from unnest(string_to_array(meta.value, ',')))
  273. from source_metadata meta
  274. where meta.src_id = (
  275. select newsrc.src
  276. from newest_src_association newsrc
  277. where newsrc.source = (
  278. select s.source
  279. from source s
  280. where s.id = bin.source)
  281. and newsrc.suite = bin.suite)
  282. and key_id = (
  283. select key_id
  284. from metadata_keys
  285. where key = 'Binary'))
  286. group by src.source, newsrc.version, architecture
  287. order by src.source, newsrc.version, bin.architecture"""
  288. return session.execute(query, {'suite_id': suite_id})