rpmwatcher_format_reports.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. #!/bin/env python
  2. """
  3. Formats reports about our RPMs
  4. Prerequisites:
  5. - python-markdown
  6. - a /data directory that contains the workdir dir updated by rpmwatcher_extract_roles.py
  7. Note, in what follows:
  8. - NVR means Name Version Release
  9. - NVRA means Name Version Release Arch
  10. Those are common concepts in the RPM world.
  11. """
  12. from __future__ import print_function
  13. import argparse
  14. import os
  15. import json
  16. import codecs
  17. import StringIO
  18. import markdown
  19. import rpm
  20. import urllib
  21. KOJI_URL = "https://koji.xcp-ng.org"
  22. KOJI_BUILD_URL = KOJI_URL + "/search?match=exact&type=build&terms=%s"
  23. # Unless you know an exact *binary* RPM name, there's no better URL than a simple search on the name, without the version
  24. # They don't display information pages for SRPMs, unfortunately...
  25. # Also: this URL does only exact name matches, whereas if you use their search field there's
  26. # an implicit wildcard... But the URL is the same... Not-significant URLs...
  27. CENTOS_RPM_URL = "https://pkgs.org/download/%s"
  28. # I could use Fedora's koji for a more direct URL, but it is slow to respond to search queries
  29. # and I don't want to put too much burden on it (if for example a crawler tries every URL)
  30. EPEL_RPM_URL = "https://pkgs.org/download/%s"
  31. def check_dir(dirpath):
  32. if not os.path.isdir(dirpath):
  33. raise Exception("Directory %s doesn't exist" % dirpath)
  34. return dirpath
  35. def format_role(xcp_builds, xcp_rpms, role, data, max_entries=None):
  36. if role in ['main', 'extra']:
  37. # short RPM names
  38. details = [xcp_rpms[rpm_nvra]['name'] for rpm_nvra in data]
  39. label_for = ""
  40. elif role in ['extra_dep', 'other_dep']:
  41. # short RPM names
  42. details = [xcp_rpms[rpm_nvra]['name'] for rpm_nvra in data]
  43. label_for = "for "
  44. else:
  45. # short SRPM names
  46. details = []
  47. for srpm_nvr in data:
  48. details.append(xcp_builds[srpm_nvr]['name'])
  49. label_for = "for "
  50. if max_entries is not None:
  51. if len(details) > max_entries:
  52. details = details[:max_entries] + ['...']
  53. return "%s (%s%s)" % (role, label_for, ' '.join(details))
  54. def js_color_cell_values(values=[], color='black'):
  55. return """
  56. if (%s.indexOf(cells[i].innerHTML) >=0 ){
  57. cells[i].style.color = '%s';
  58. }
  59. """ % (json.dumps(values), color)
  60. # From https://docs.python.org/3/howto/sorting.html
  61. def cmp_to_key(mycmp):
  62. 'Convert a cmp= function into a key= function'
  63. class K(object):
  64. def __init__(self, obj, *args):
  65. self.obj = obj
  66. def __lt__(self, other):
  67. return mycmp(self.obj, other.obj) < 0
  68. def __gt__(self, other):
  69. return mycmp(self.obj, other.obj) > 0
  70. def __eq__(self, other):
  71. return mycmp(self.obj, other.obj) == 0
  72. def __le__(self, other):
  73. return mycmp(self.obj, other.obj) <= 0
  74. def __ge__(self, other):
  75. return mycmp(self.obj, other.obj) >= 0
  76. def __ne__(self, other):
  77. return mycmp(self.obj, other.obj) != 0
  78. return K
  79. def simplify_roles(roles):
  80. # Most packages have lots of roles, so we need to simplify visually
  81. # Rules:
  82. # * 'main' hides 'extra_*' and 'other_*'
  83. if 'main' in roles:
  84. for role in roles.keys():
  85. if role.startswith('extra_') or role.startswith('other_'):
  86. del roles[role]
  87. # * 'extra' hides 'other_*'
  88. if 'extra' in roles:
  89. for role in roles.keys():
  90. if role.startswith('other_'):
  91. del roles[role]
  92. # * 'xxx_builddep' hides 'xxx_builddep_dep' (same value of xxx and same SRPM)
  93. for role1 in ['main_builddep', 'extra_builddep', 'other_builddep']:
  94. if role1 in roles.keys():
  95. role2 = role1 + '_dep'
  96. if role2 in roles.keys():
  97. # remove from role2's SRPMs those that are in role1
  98. roles[role2] = [x for x in roles[role2] if x not in roles[role1]]
  99. # remove role2 entirely if empty
  100. if not roles[role2]:
  101. del roles[role2]
  102. # * anything hides something that ends in "_indirect_builddep"
  103. if [x for x in roles if not x.endswith('_indirect_builddep')]:
  104. for role in [x for x in roles if x.endswith('_indirect_builddep')]:
  105. del roles[role]
  106. # * anything that does not begin with 'other' hides 'other*'
  107. if [x for x in roles if not x.startswith('other')]:
  108. for role in roles.keys():
  109. if role.startswith('other'):
  110. del roles[role]
  111. def main():
  112. parser = argparse.ArgumentParser(description='Format reports about XCP-ng RPMs')
  113. parser.add_argument('version', help='XCP-ng 2-digit version, e.g. 8.0')
  114. parser.add_argument('basedir', help='path to the base directory where repos must be present and where '
  115. 'we\'ll read data from.')
  116. format_choices = ['csv', 'markdown', 'html']
  117. parser.add_argument('format', help='output format: %s.' % " or ".join(format_choices), choices=format_choices)
  118. args = parser.parse_args()
  119. format = args.format
  120. shorten_output = format != 'csv'
  121. elaborate_output = format != 'csv'
  122. base_dir = os.path.abspath(check_dir(args.basedir))
  123. xcp_version = args.version
  124. xcp_srpm_repo = check_dir(os.path.join(base_dir, 'xcp-ng', xcp_version))
  125. xcp_rpm_repo = check_dir(os.path.join(base_dir, 'xcp-ng_rpms', xcp_version))
  126. work_dir = check_dir(os.path.join(base_dir, 'workdir', xcp_version))
  127. reports_dir = os.path.join(work_dir, 'reports')
  128. if not os.path.exists(reports_dir):
  129. os.mkdir(reports_dir)
  130. # Read data from workdir
  131. with open(os.path.join(work_dir, 'xcp-ng_builds.json')) as f:
  132. xcp_builds = json.load(f)
  133. with open(os.path.join(work_dir, 'xcp-ng_rpms.json')) as f:
  134. xcp_rpms = json.load(f)
  135. role_priority = [
  136. 'main',
  137. 'main_builddep',
  138. 'main_builddep_dep',
  139. 'main_indirect_builddep',
  140. 'extra',
  141. 'extra_dep',
  142. 'extra_builddep',
  143. 'extra_builddep_dep',
  144. 'extra_indirect_builddep',
  145. 'other_builddep',
  146. 'other_builddep_dep',
  147. 'other_indirect_builddep',
  148. 'other_dep'
  149. ]
  150. srpm_fields_ref = {
  151. # key: label
  152. 'srpm_name': 'SRPM name',
  153. 'repo': 'repo',
  154. 'version': 'version',
  155. 'centos_version': 'CentOS version',
  156. 'epel_version': 'EPEL version',
  157. 'summary': 'summary',
  158. 'built_by': 'built by',
  159. 'added_by': 'added by',
  160. 'import_reason': 'import reason',
  161. 'main_role': 'main role',
  162. 'provenance': 'provenance',
  163. 'roles': 'main roles',
  164. 'direct_build_deps': 'direct build deps',
  165. 'rpms': 'rpms',
  166. }
  167. srpm_reports_ref = {
  168. 'roles_and_deps': [
  169. 'srpm_name',
  170. 'repo',
  171. 'version',
  172. 'built_by',
  173. 'added_by',
  174. 'import_reason',
  175. 'main_role',
  176. 'roles',
  177. 'direct_build_deps',
  178. 'rpms'
  179. ],
  180. 'versions': [
  181. 'srpm_name',
  182. 'summary',
  183. 'repo',
  184. 'version',
  185. 'centos_version',
  186. 'epel_version',
  187. 'built_by',
  188. 'added_by',
  189. 'import_reason',
  190. 'main_role',
  191. 'roles'
  192. ]
  193. }
  194. srpm_reports = {}
  195. for report_name in srpm_reports_ref:
  196. srpm_reports[report_name] = []
  197. # data
  198. for srpm_nvr, build_info in xcp_builds.iteritems():
  199. repo = build_info['koji_tag'][len('v%s-' % xcp_version):]
  200. srpm_name = build_info['name']
  201. summary = build_info['summary']
  202. built_by = build_info['built-by']
  203. added_by = build_info.get('added_by', '').lower()
  204. import_reason = build_info.get('import_reason', '')
  205. # roles
  206. simplify_roles(build_info['roles'])
  207. main_role = None
  208. roles_list = []
  209. for role in role_priority:
  210. if role in build_info['roles']:
  211. if main_role is None:
  212. main_role = role
  213. roles_list.append(format_role(xcp_builds, xcp_rpms, role, build_info['roles'][role],
  214. max_entries=5 if shorten_output else None))
  215. roles = "\n".join(roles_list)
  216. if main_role is None:
  217. main_role = 'None'
  218. # build deps are present only for packages built by XCP-ng
  219. direct_build_deps = ""
  220. if 'build-deps' in build_info:
  221. direct_build_deps_list = [xcp_rpms[rpm_nvra]['name'] for rpm_nvra in build_info['build-deps'][0]]
  222. if shorten_output and len(direct_build_deps_list) > 10:
  223. direct_build_deps_list = direct_build_deps_list[:10] + ['...']
  224. direct_build_deps = " ".join(direct_build_deps_list)
  225. # rpms
  226. rpms_list = [xcp_rpms[rpm_nvra]['name'] for rpm_nvra in build_info['rpms']]
  227. if shorten_output and len(rpms_list) > 10:
  228. rpms_list = rpms_list[:10] + ['...']
  229. rpms = " ".join(rpms_list)
  230. # versions: highest version in bold display
  231. # note: voluntarily avoiding epoch in version comparisons because we might have different epochs
  232. version = build_info['version'] + '-' + build_info['release']
  233. nvr_tuple = ('1', build_info['version'], build_info['release'])
  234. if 'latest-centos' in build_info:
  235. centos_version = build_info['latest-centos']['version'] + '-' + build_info['latest-centos']['release']
  236. centos_nvr_tuple = ('1', build_info['latest-centos']['version'], build_info['latest-centos']['release'])
  237. else:
  238. centos_version = ""
  239. centos_nvr_tuple = ('0', '0', '0')
  240. if 'latest-epel' in build_info:
  241. epel_version = build_info['latest-epel']['version'] + '-' + build_info['latest-epel']['release']
  242. epel_nvr_tuple = ('1', build_info['latest-epel']['version'], build_info['latest-epel']['release'])
  243. else:
  244. epel_version = ""
  245. epel_nvr_tuple = ('0', '0', '0')
  246. max_nvr_tuple = max([nvr_tuple, centos_nvr_tuple, epel_nvr_tuple], key=cmp_to_key(rpm.labelCompare))
  247. if max_nvr_tuple == nvr_tuple:
  248. if (centos_version or epel_version) and max_nvr_tuple not in [epel_nvr_tuple, centos_nvr_tuple]:
  249. version = '**%s**' % version
  250. elif max_nvr_tuple == centos_nvr_tuple:
  251. centos_version = '**%s**' % centos_version
  252. elif max_nvr_tuple == epel_nvr_tuple:
  253. epel_version = '**%s**' % epel_version
  254. # add data to reports
  255. for report_name, report in srpm_reports.iteritems():
  256. row = []
  257. for field in srpm_reports_ref[report_name]:
  258. if field == 'srpm_name':
  259. if elaborate_output:
  260. value = "[%s](%s)" % (srpm_name, KOJI_BUILD_URL % urllib.quote(srpm_nvr))
  261. else:
  262. value = srpm_name
  263. elif field == 'repo':
  264. value = repo
  265. elif field == 'version':
  266. value = version
  267. elif field == 'centos_version':
  268. if centos_version and elaborate_output:
  269. value = "[%s](%s)" % (centos_version, CENTOS_RPM_URL % srpm_name)
  270. else:
  271. value = centos_version
  272. elif field == 'epel_version':
  273. if epel_version and elaborate_output:
  274. value = "[%s](%s)" % (epel_version, EPEL_RPM_URL % srpm_name)
  275. else:
  276. value = epel_version
  277. elif field == 'summary':
  278. value = summary
  279. elif field == 'built_by':
  280. value = built_by
  281. elif field == 'added_by':
  282. value= added_by
  283. elif field == 'import_reason':
  284. value= import_reason
  285. elif field == 'main_role':
  286. value = main_role
  287. elif field == 'roles':
  288. value = roles
  289. elif field == 'direct_build_deps':
  290. value = direct_build_deps
  291. elif field == 'rpms':
  292. value = rpms
  293. else:
  294. raise Exception("Couldn't handle field '%s'" % field)
  295. row.append(value)
  296. report.append(row)
  297. # sort rows in reports
  298. built_by_order = [
  299. 'xcp-ng',
  300. 'centos',
  301. 'epel',
  302. 'xs',
  303. 'unknown'
  304. ]
  305. role_priority.append('None')
  306. for report_name, report in srpm_reports.iteritems():
  307. headers = srpm_reports_ref[report_name]
  308. def custom_cmp(row1, row2):
  309. role_index = headers.index('main_role')
  310. if role_priority.index(row1[role_index]) > role_priority.index(row2[role_index]):
  311. return 1
  312. if role_priority.index(row1[role_index]) < role_priority.index(row2[role_index]):
  313. return -1
  314. built_by_index = headers.index('built_by')
  315. if built_by_order.index(row1[built_by_index]) > built_by_order.index(row2[built_by_index]):
  316. return 1
  317. if built_by_order.index(row1[built_by_index]) < built_by_order.index(row2[built_by_index]):
  318. return -1
  319. name_index = headers.index('srpm_name')
  320. return cmp([row1[name_index]], [row2[name_index]])
  321. report.sort(cmp=custom_cmp)
  322. # add header
  323. report.insert(0, [srpm_fields_ref[field] for field in srpm_reports_ref[report_name]])
  324. # format and write output
  325. if format == 'csv':
  326. with codecs.open(os.path.join(reports_dir, 'report_%s.csv' % report_name), 'w', encoding='utf8') as f:
  327. for row in report:
  328. row = [field.replace('\n', ' - ') for field in row]
  329. f.write(';'.join(row) + '\n')
  330. elif format in ['markdown', 'html']:
  331. s = StringIO.StringIO()
  332. s.write(' | '.join(report[0]) + '\n')
  333. separator = '-'
  334. for i in xrange(len(report[0]) - 1):
  335. separator += ' | -'
  336. s.write(separator + '\n')
  337. for row in report[1:]:
  338. row = [field.replace('\n', '<br>') for field in row]
  339. s.write(' | '.join(row) + '\n')
  340. try:
  341. if format == 'markdown':
  342. with codecs.open(os.path.join(reports_dir, 'report_%s.md' % report_name), 'w', encoding='utf8') as f:
  343. f.write(s.getvalue())
  344. elif format == 'html':
  345. with codecs.open(os.path.join(reports_dir, 'report_%s.html' % report_name), 'w', encoding='utf8') as f:
  346. f.write("""
  347. <!DOCTYPE html>
  348. <html lang="en">
  349. <head>
  350. <meta charset="utf-8"/>
  351. <style>
  352. table {
  353. border-width: 1px;
  354. border-collapse: collapse;
  355. }
  356. td, th {
  357. font-size: 0.75em;
  358. border-width: 1px;
  359. border-color: silver;
  360. border-style: solid;
  361. padding: 2px;
  362. }
  363. </style>
  364. </head>
  365. """)
  366. f.write(markdown.markdown(s.getvalue(), extensions=['tables']))
  367. script = """
  368. <script>
  369. var table = document.getElementsByTagName('table')[0];
  370. var tbody = table.getElementsByTagName('tbody')[0];
  371. var cells = tbody.getElementsByTagName('td');
  372. for (var i=0, len=cells.length; i<len; i++){
  373. """
  374. script += js_color_cell_values([v for v in role_priority if v.startswith('main')], 'green')
  375. script += js_color_cell_values([v for v in role_priority if v.startswith('extra')], 'blue')
  376. script += js_color_cell_values([v for v in role_priority if v.startswith('other')] + ['None'], 'tomato')
  377. script += js_color_cell_values(['updates'], 'blue')
  378. script += js_color_cell_values(['candidates'], 'orangered')
  379. script += js_color_cell_values(['testing'], 'orangered')
  380. script += js_color_cell_values(['ci'], 'orangered')
  381. script += js_color_cell_values(['xcp-ng'], '#263740')
  382. script += js_color_cell_values(['centos'], 'sienna')
  383. script += js_color_cell_values(['epel'], 'orchid')
  384. script += js_color_cell_values(['xs'], 'tomato')
  385. script += js_color_cell_values(['unknown'], 'red')
  386. script += """
  387. }
  388. </script>
  389. </html>
  390. """
  391. f.write(script)
  392. else:
  393. raise("Unexpected.")
  394. finally:
  395. s.close()
  396. else:
  397. raise("Oops, I don't know how to handle format '%s'." % format)
  398. if __name__ == "__main__":
  399. main()