contents.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497
  1. #!/usr/bin/env python
  2. """
  3. Helper code for contents generation.
  4. @contact: Debian FTPMaster <ftpmaster@debian.org>
  5. @copyright: 2011 Torsten Werner <twerner@debian.org>
  6. @license: GNU General Public License version 2 or later
  7. """
  8. ################################################################################
  9. # This program is free software; you can redistribute it and/or modify
  10. # it under the terms of the GNU General Public License as published by
  11. # the Free Software Foundation; either version 2 of the License, or
  12. # (at your option) any later version.
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. ################################################################################
  21. from daklib.dbconn import *
  22. from daklib.config import Config
  23. from daklib.filewriter import BinaryContentsFileWriter, SourceContentsFileWriter
  24. from multiprocessing import Pool
  25. from shutil import rmtree
  26. from tempfile import mkdtemp
  27. import daklib.daksubprocess
  28. import os.path
  29. class BinaryContentsWriter(object):
  30. '''
  31. BinaryContentsWriter writes the Contents-$arch.gz files.
  32. '''
  33. def __init__(self, suite, architecture, overridetype, component):
  34. self.suite = suite
  35. self.architecture = architecture
  36. self.overridetype = overridetype
  37. self.component = component
  38. self.session = suite.session()
  39. def query(self):
  40. '''
  41. Returns a query object that is doing most of the work.
  42. '''
  43. overridesuite = self.suite
  44. if self.suite.overridesuite is not None:
  45. overridesuite = get_suite(self.suite.overridesuite, self.session)
  46. params = {
  47. 'suite': self.suite.suite_id,
  48. 'overridesuite': overridesuite.suite_id,
  49. 'component': self.component.component_id,
  50. 'arch_all': get_architecture('all', self.session).arch_id,
  51. 'arch': self.architecture.arch_id,
  52. 'type_id': self.overridetype.overridetype_id,
  53. 'type': self.overridetype.overridetype,
  54. }
  55. sql_create_temp = '''
  56. create temp table newest_binaries (
  57. id integer primary key,
  58. package text);
  59. create index newest_binaries_by_package on newest_binaries (package);
  60. insert into newest_binaries (id, package)
  61. select distinct on (package) id, package from binaries
  62. where type = :type and
  63. (architecture = :arch_all or architecture = :arch) and
  64. id in (select bin from bin_associations where suite = :suite)
  65. order by package, version desc;'''
  66. self.session.execute(sql_create_temp, params=params)
  67. sql = '''
  68. with
  69. unique_override as
  70. (select o.package, s.section
  71. from override o, section s
  72. where o.suite = :overridesuite and o.type = :type_id and o.section = s.id and
  73. o.component = :component)
  74. select bc.file, string_agg(o.section || '/' || b.package, ',' order by b.package) as pkglist
  75. from newest_binaries b, bin_contents bc, unique_override o
  76. where b.id = bc.binary_id and o.package = b.package
  77. group by bc.file'''
  78. return self.session.query("file", "pkglist").from_statement(sql). \
  79. params(params)
  80. def formatline(self, filename, package_list):
  81. '''
  82. Returns a formatted string for the filename argument.
  83. '''
  84. return "%-55s %s\n" % (filename, package_list)
  85. def fetch(self):
  86. '''
  87. Yields a new line of the Contents-$arch.gz file in filename order.
  88. '''
  89. for filename, package_list in self.query().yield_per(100):
  90. yield self.formatline(filename, package_list)
  91. # end transaction to return connection to pool
  92. self.session.rollback()
  93. def get_list(self):
  94. '''
  95. Returns a list of lines for the Contents-$arch.gz file.
  96. '''
  97. return [item for item in self.fetch()]
  98. def writer(self):
  99. '''
  100. Returns a writer object.
  101. '''
  102. values = {
  103. 'archive': self.suite.archive.path,
  104. 'suite': self.suite.suite_name,
  105. 'component': self.component.component_name,
  106. 'debtype': self.overridetype.overridetype,
  107. 'architecture': self.architecture.arch_string,
  108. }
  109. return BinaryContentsFileWriter(**values)
  110. def get_header(self):
  111. '''
  112. Returns the header for the Contents files as a string.
  113. '''
  114. filename = os.path.join(Config()['Dir::Templates'], 'contents')
  115. with open(filename) as header_file:
  116. return header_file.read()
  117. def write_file(self):
  118. '''
  119. Write the output file.
  120. '''
  121. writer = self.writer()
  122. file = writer.open()
  123. file.write(self.get_header())
  124. for item in self.fetch():
  125. file.write(item)
  126. writer.close()
  127. class SourceContentsWriter(object):
  128. '''
  129. SourceContentsWriter writes the Contents-source.gz files.
  130. '''
  131. def __init__(self, suite, component):
  132. self.suite = suite
  133. self.component = component
  134. self.session = suite.session()
  135. def query(self):
  136. '''
  137. Returns a query object that is doing most of the work.
  138. '''
  139. params = {
  140. 'suite_id': self.suite.suite_id,
  141. 'component_id': self.component.component_id,
  142. }
  143. sql_create_temp = '''
  144. create temp table newest_sources (
  145. id integer primary key,
  146. source text);
  147. create index sources_binaries_by_source on newest_sources (source);
  148. insert into newest_sources (id, source)
  149. select distinct on (source) s.id, s.source from source s
  150. join files_archive_map af on s.file = af.file_id
  151. where s.id in (select source from src_associations where suite = :suite_id)
  152. and af.component_id = :component_id
  153. order by source, version desc;'''
  154. self.session.execute(sql_create_temp, params=params)
  155. sql = '''
  156. select sc.file, string_agg(s.source, ',' order by s.source) as pkglist
  157. from newest_sources s, src_contents sc
  158. where s.id = sc.source_id group by sc.file'''
  159. return self.session.query("file", "pkglist").from_statement(sql). \
  160. params(params)
  161. def formatline(self, filename, package_list):
  162. '''
  163. Returns a formatted string for the filename argument.
  164. '''
  165. return "%s\t%s\n" % (filename, package_list)
  166. def fetch(self):
  167. '''
  168. Yields a new line of the Contents-source.gz file in filename order.
  169. '''
  170. for filename, package_list in self.query().yield_per(100):
  171. yield self.formatline(filename, package_list)
  172. # end transaction to return connection to pool
  173. self.session.rollback()
  174. def get_list(self):
  175. '''
  176. Returns a list of lines for the Contents-source.gz file.
  177. '''
  178. return [item for item in self.fetch()]
  179. def writer(self):
  180. '''
  181. Returns a writer object.
  182. '''
  183. values = {
  184. 'archive': self.suite.archive.path,
  185. 'suite': self.suite.suite_name,
  186. 'component': self.component.component_name
  187. }
  188. return SourceContentsFileWriter(**values)
  189. def write_file(self):
  190. '''
  191. Write the output file.
  192. '''
  193. writer = self.writer()
  194. file = writer.open()
  195. for item in self.fetch():
  196. file.write(item)
  197. writer.close()
  198. def binary_helper(suite_id, arch_id, overridetype_id, component_id):
  199. '''
  200. This function is called in a new subprocess and multiprocessing wants a top
  201. level function.
  202. '''
  203. session = DBConn().session(work_mem = 1000)
  204. suite = Suite.get(suite_id, session)
  205. architecture = Architecture.get(arch_id, session)
  206. overridetype = OverrideType.get(overridetype_id, session)
  207. component = Component.get(component_id, session)
  208. log_message = [suite.suite_name, architecture.arch_string, \
  209. overridetype.overridetype, component.component_name]
  210. contents_writer = BinaryContentsWriter(suite, architecture, overridetype, component)
  211. contents_writer.write_file()
  212. session.close()
  213. return log_message
  214. def source_helper(suite_id, component_id):
  215. '''
  216. This function is called in a new subprocess and multiprocessing wants a top
  217. level function.
  218. '''
  219. session = DBConn().session(work_mem = 1000)
  220. suite = Suite.get(suite_id, session)
  221. component = Component.get(component_id, session)
  222. log_message = [suite.suite_name, 'source', component.component_name]
  223. contents_writer = SourceContentsWriter(suite, component)
  224. contents_writer.write_file()
  225. session.close()
  226. return log_message
  227. class ContentsWriter(object):
  228. '''
  229. Loop over all suites, architectures, overridetypes, and components to write
  230. all contents files.
  231. '''
  232. @classmethod
  233. def log_result(class_, result):
  234. '''
  235. Writes a result message to the logfile.
  236. '''
  237. class_.logger.log(result)
  238. @classmethod
  239. def write_all(class_, logger, archive_names = [], suite_names = [], component_names = [], force = False):
  240. '''
  241. Writes all Contents files for suites in list suite_names which defaults
  242. to all 'touchable' suites if not specified explicitely. Untouchable
  243. suites will be included if the force argument is set to True.
  244. '''
  245. class_.logger = logger
  246. session = DBConn().session()
  247. suite_query = session.query(Suite)
  248. if len(archive_names) > 0:
  249. suite_query = suite_query.join(Suite.archive).filter(Archive.archive_name.in_(archive_names))
  250. if len(suite_names) > 0:
  251. suite_query = suite_query.filter(Suite.suite_name.in_(suite_names))
  252. component_query = session.query(Component)
  253. if len(component_names) > 0:
  254. component_query = component_query.filter(Component.component_name.in_(component_names))
  255. if not force:
  256. suite_query = suite_query.filter(Suite.untouchable == False)
  257. deb_id = get_override_type('deb', session).overridetype_id
  258. udeb_id = get_override_type('udeb', session).overridetype_id
  259. pool = Pool()
  260. for suite in suite_query:
  261. suite_id = suite.suite_id
  262. for component in component_query:
  263. component_id = component.component_id
  264. # handle source packages
  265. pool.apply_async(source_helper, (suite_id, component_id),
  266. callback = class_.log_result)
  267. for architecture in suite.get_architectures(skipsrc = True, skipall = True):
  268. arch_id = architecture.arch_id
  269. # handle 'deb' packages
  270. pool.apply_async(binary_helper, (suite_id, arch_id, deb_id, component_id), \
  271. callback = class_.log_result)
  272. # handle 'udeb' packages
  273. pool.apply_async(binary_helper, (suite_id, arch_id, udeb_id, component_id), \
  274. callback = class_.log_result)
  275. pool.close()
  276. pool.join()
  277. session.close()
  278. class BinaryContentsScanner(object):
  279. '''
  280. BinaryContentsScanner provides a threadsafe method scan() to scan the
  281. contents of a DBBinary object.
  282. '''
  283. def __init__(self, binary_id):
  284. '''
  285. The argument binary_id is the id of the DBBinary object that
  286. should be scanned.
  287. '''
  288. self.binary_id = binary_id
  289. def scan(self, dummy_arg = None):
  290. '''
  291. This method does the actual scan and fills in the associated BinContents
  292. property. It commits any changes to the database. The argument dummy_arg
  293. is ignored but needed by our threadpool implementation.
  294. '''
  295. session = DBConn().session()
  296. binary = session.query(DBBinary).get(self.binary_id)
  297. fileset = set(binary.scan_contents())
  298. if len(fileset) == 0:
  299. fileset.add('EMPTY_PACKAGE')
  300. for filename in fileset:
  301. binary.contents.append(BinContents(file = filename))
  302. session.commit()
  303. session.close()
  304. @classmethod
  305. def scan_all(class_, limit = None):
  306. '''
  307. The class method scan_all() scans all binaries using multiple threads.
  308. The number of binaries to be scanned can be limited with the limit
  309. argument. Returns the number of processed and remaining packages as a
  310. dict.
  311. '''
  312. session = DBConn().session()
  313. query = session.query(DBBinary).filter(DBBinary.contents == None)
  314. remaining = query.count
  315. if limit is not None:
  316. query = query.limit(limit)
  317. processed = query.count()
  318. pool = Pool()
  319. for binary in query.yield_per(100):
  320. pool.apply_async(binary_scan_helper, (binary.binary_id, ))
  321. pool.close()
  322. pool.join()
  323. remaining = remaining()
  324. session.close()
  325. return { 'processed': processed, 'remaining': remaining }
  326. def binary_scan_helper(binary_id):
  327. '''
  328. This function runs in a subprocess.
  329. '''
  330. scanner = BinaryContentsScanner(binary_id)
  331. scanner.scan()
  332. class UnpackedSource(object):
  333. '''
  334. UnpackedSource extracts a source package into a temporary location and
  335. gives you some convinient function for accessing it.
  336. '''
  337. def __init__(self, dscfilename, tmpbasedir=None):
  338. '''
  339. The dscfilename is a name of a DSC file that will be extracted.
  340. '''
  341. basedir = tmpbasedir if tmpbasedir else Config()['Dir::TempPath']
  342. temp_directory = mkdtemp(dir = basedir)
  343. self.root_directory = os.path.join(temp_directory, 'root')
  344. command = ('dpkg-source', '--no-copy', '--no-check', '-q', '-x',
  345. dscfilename, self.root_directory)
  346. daklib.daksubprocess.check_call(command)
  347. def get_root_directory(self):
  348. '''
  349. Returns the name of the package's root directory which is the directory
  350. where the debian subdirectory is located.
  351. '''
  352. return self.root_directory
  353. def get_changelog_file(self):
  354. '''
  355. Returns a file object for debian/changelog or None if no such file exists.
  356. '''
  357. changelog_name = os.path.join(self.root_directory, 'debian', 'changelog')
  358. try:
  359. return open(changelog_name)
  360. except IOError:
  361. return None
  362. def get_all_filenames(self):
  363. '''
  364. Returns an iterator over all filenames. The filenames will be relative
  365. to the root directory.
  366. '''
  367. skip = len(self.root_directory) + 1
  368. for root, _, files in os.walk(self.root_directory):
  369. for name in files:
  370. yield os.path.join(root[skip:], name)
  371. def cleanup(self):
  372. '''
  373. Removes all temporary files.
  374. '''
  375. if self.root_directory is None:
  376. return
  377. parent_directory = os.path.dirname(self.root_directory)
  378. rmtree(parent_directory)
  379. self.root_directory = None
  380. def __del__(self):
  381. '''
  382. Enforce cleanup.
  383. '''
  384. self.cleanup()
  385. class SourceContentsScanner(object):
  386. '''
  387. SourceContentsScanner provides a method scan() to scan the contents of a
  388. DBSource object.
  389. '''
  390. def __init__(self, source_id):
  391. '''
  392. The argument source_id is the id of the DBSource object that
  393. should be scanned.
  394. '''
  395. self.source_id = source_id
  396. def scan(self):
  397. '''
  398. This method does the actual scan and fills in the associated SrcContents
  399. property. It commits any changes to the database.
  400. '''
  401. session = DBConn().session()
  402. source = session.query(DBSource).get(self.source_id)
  403. fileset = set(source.scan_contents())
  404. for filename in fileset:
  405. source.contents.append(SrcContents(file = filename))
  406. session.commit()
  407. session.close()
  408. @classmethod
  409. def scan_all(class_, limit = None):
  410. '''
  411. The class method scan_all() scans all source using multiple processes.
  412. The number of sources to be scanned can be limited with the limit
  413. argument. Returns the number of processed and remaining packages as a
  414. dict.
  415. '''
  416. session = DBConn().session()
  417. query = session.query(DBSource).filter(DBSource.contents == None)
  418. remaining = query.count
  419. if limit is not None:
  420. query = query.limit(limit)
  421. processed = query.count()
  422. pool = Pool()
  423. for source in query.yield_per(100):
  424. pool.apply_async(source_scan_helper, (source.source_id, ))
  425. pool.close()
  426. pool.join()
  427. remaining = remaining()
  428. session.close()
  429. return { 'processed': processed, 'remaining': remaining }
  430. def source_scan_helper(source_id):
  431. '''
  432. This function runs in a subprocess.
  433. '''
  434. try:
  435. scanner = SourceContentsScanner(source_id)
  436. scanner.scan()
  437. except Exception as e:
  438. print e