make_changelog.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. from __future__ import annotations
  2. # Allow direct execution
  3. import os
  4. import sys
  5. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  6. import enum
  7. import itertools
  8. import json
  9. import logging
  10. import re
  11. from collections import defaultdict
  12. from dataclasses import dataclass
  13. from functools import lru_cache
  14. from pathlib import Path
  15. from devscripts.utils import read_file, run_process, write_file
  16. BASE_URL = 'https://github.com'
  17. LOCATION_PATH = Path(__file__).parent
  18. HASH_LENGTH = 7
  19. logger = logging.getLogger(__name__)
  20. class CommitGroup(enum.Enum):
  21. PRIORITY = 'Important'
  22. CORE = 'Core'
  23. EXTRACTOR = 'Extractor'
  24. DOWNLOADER = 'Downloader'
  25. POSTPROCESSOR = 'Postprocessor'
  26. NETWORKING = 'Networking'
  27. MISC = 'Misc.'
  28. @classmethod
  29. @lru_cache
  30. def subgroup_lookup(cls):
  31. return {
  32. name: group
  33. for group, names in {
  34. cls.MISC: {
  35. 'build',
  36. 'ci',
  37. 'cleanup',
  38. 'devscripts',
  39. 'docs',
  40. 'test',
  41. },
  42. cls.NETWORKING: {
  43. 'rh',
  44. },
  45. }.items()
  46. for name in names
  47. }
  48. @classmethod
  49. @lru_cache
  50. def group_lookup(cls):
  51. result = {
  52. 'fd': cls.DOWNLOADER,
  53. 'ie': cls.EXTRACTOR,
  54. 'pp': cls.POSTPROCESSOR,
  55. 'upstream': cls.CORE,
  56. }
  57. result.update({item.name.lower(): item for item in iter(cls)})
  58. return result
  59. @classmethod
  60. def get(cls, value: str) -> tuple[CommitGroup | None, str | None]:
  61. group, _, subgroup = (group.strip().lower() for group in value.partition('/'))
  62. if result := cls.group_lookup().get(group):
  63. return result, subgroup or None
  64. if subgroup:
  65. return None, value
  66. return cls.subgroup_lookup().get(group), group or None
  67. @dataclass
  68. class Commit:
  69. hash: str | None
  70. short: str
  71. authors: list[str]
  72. def __str__(self):
  73. result = f'{self.short!r}'
  74. if self.hash:
  75. result += f' ({self.hash[:HASH_LENGTH]})'
  76. if self.authors:
  77. authors = ', '.join(self.authors)
  78. result += f' by {authors}'
  79. return result
  80. @dataclass
  81. class CommitInfo:
  82. details: str | None
  83. sub_details: tuple[str, ...]
  84. message: str
  85. issues: list[str]
  86. commit: Commit
  87. fixes: list[Commit]
  88. def key(self):
  89. return ((self.details or '').lower(), self.sub_details, self.message)
  90. def unique(items):
  91. return sorted({item.strip().lower(): item for item in items if item}.values())
  92. class Changelog:
  93. MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE)
  94. ALWAYS_SHOWN = (CommitGroup.PRIORITY,)
  95. def __init__(self, groups, repo, collapsible=False):
  96. self._groups = groups
  97. self._repo = repo
  98. self._collapsible = collapsible
  99. def __str__(self):
  100. return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ')
  101. def _format_groups(self, groups):
  102. first = True
  103. for item in CommitGroup:
  104. if self._collapsible and item not in self.ALWAYS_SHOWN and first:
  105. first = False
  106. yield '\n<details><summary><h3>Changelog</h3></summary>\n'
  107. if group := groups[item]:
  108. yield self.format_module(item.value, group)
  109. if self._collapsible:
  110. yield '\n</details>'
  111. def format_module(self, name, group):
  112. result = f'\n#### {name} changes\n' if name else '\n'
  113. return result + '\n'.join(self._format_group(group))
  114. def _format_group(self, group):
  115. sorted_group = sorted(group, key=CommitInfo.key)
  116. detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower())
  117. for _, items in detail_groups:
  118. items = list(items)
  119. details = items[0].details
  120. if details == 'cleanup':
  121. items = self._prepare_cleanup_misc_items(items)
  122. prefix = '-'
  123. if details:
  124. if len(items) == 1:
  125. prefix = f'- **{details}**:'
  126. else:
  127. yield f'- **{details}**'
  128. prefix = '\t-'
  129. sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details)))
  130. for sub_details, entries in sub_detail_groups:
  131. if not sub_details:
  132. for entry in entries:
  133. yield f'{prefix} {self.format_single_change(entry)}'
  134. continue
  135. entries = list(entries)
  136. sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}'
  137. if len(entries) == 1:
  138. yield f'{sub_prefix}: {self.format_single_change(entries[0])}'
  139. continue
  140. yield sub_prefix
  141. for entry in entries:
  142. yield f'\t{prefix} {self.format_single_change(entry)}'
  143. def _prepare_cleanup_misc_items(self, items):
  144. cleanup_misc_items = defaultdict(list)
  145. sorted_items = []
  146. for item in items:
  147. if self.MISC_RE.search(item.message):
  148. cleanup_misc_items[tuple(item.commit.authors)].append(item)
  149. else:
  150. sorted_items.append(item)
  151. for commit_infos in cleanup_misc_items.values():
  152. sorted_items.append(CommitInfo(
  153. 'cleanup', ('Miscellaneous',), ', '.join(
  154. self._format_message_link(None, info.commit.hash)
  155. for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')),
  156. [], Commit(None, '', commit_infos[0].commit.authors), []))
  157. return sorted_items
  158. def format_single_change(self, info: CommitInfo):
  159. message, sep, rest = info.message.partition('\n')
  160. if '[' not in message:
  161. # If the message doesn't already contain markdown links, try to add a link to the commit
  162. message = self._format_message_link(message, info.commit.hash)
  163. if info.issues:
  164. message = f'{message} ({self._format_issues(info.issues)})'
  165. if info.commit.authors:
  166. message = f'{message} by {self._format_authors(info.commit.authors)}'
  167. if info.fixes:
  168. fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes)
  169. authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold)
  170. if authors != info.commit.authors:
  171. fix_message = f'{fix_message} by {self._format_authors(authors)}'
  172. message = f'{message} (With fixes in {fix_message})'
  173. return message if not sep else f'{message}{sep}{rest}'
  174. def _format_message_link(self, message, commit_hash):
  175. assert message or commit_hash, 'Improperly defined commit message or override'
  176. message = message if message else commit_hash[:HASH_LENGTH]
  177. return f'[{message}]({self.repo_url}/commit/{commit_hash})' if commit_hash else message
  178. def _format_issues(self, issues):
  179. return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues)
  180. @staticmethod
  181. def _format_authors(authors):
  182. return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors)
  183. @property
  184. def repo_url(self):
  185. return f'{BASE_URL}/{self._repo}'
  186. class CommitRange:
  187. COMMAND = 'git'
  188. COMMIT_SEPARATOR = '-----'
  189. AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE)
  190. MESSAGE_RE = re.compile(r'''
  191. (?:\[(?P<prefix>[^\]]+)\]\ )?
  192. (?:(?P<sub_details>`?[\w.-]+`?): )?
  193. (?P<message>.+?)
  194. (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))?
  195. ''', re.VERBOSE | re.DOTALL)
  196. EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE)
  197. REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})')
  198. FIXES_RE = re.compile(r'(?i:(?:bug\s*)?fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Improve)\s+([\da-f]{40})')
  199. UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)')
  200. def __init__(self, start, end, default_author=None):
  201. self._start, self._end = start, end
  202. self._commits, self._fixes = self._get_commits_and_fixes(default_author)
  203. self._commits_added = []
  204. def __iter__(self):
  205. return iter(itertools.chain(self._commits.values(), self._commits_added))
  206. def __len__(self):
  207. return len(self._commits) + len(self._commits_added)
  208. def __contains__(self, commit):
  209. if isinstance(commit, Commit):
  210. if not commit.hash:
  211. return False
  212. commit = commit.hash
  213. return commit in self._commits
  214. def _get_commits_and_fixes(self, default_author):
  215. result = run_process(
  216. self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}',
  217. f'{self._start}..{self._end}' if self._start else self._end).stdout
  218. commits, reverts = {}, {}
  219. fixes = defaultdict(list)
  220. lines = iter(result.splitlines(False))
  221. for i, commit_hash in enumerate(lines):
  222. short = next(lines)
  223. skip = short.startswith('Release ') or short == '[version] update'
  224. fix_commitish = None
  225. if match := self.FIXES_RE.search(short):
  226. fix_commitish = match.group(1)
  227. authors = [default_author] if default_author else []
  228. for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR):
  229. if match := self.AUTHOR_INDICATOR_RE.match(line):
  230. authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold)
  231. if not fix_commitish and (match := self.FIXES_RE.fullmatch(line)):
  232. fix_commitish = match.group(1)
  233. commit = Commit(commit_hash, short, authors)
  234. if skip and (self._start or not i):
  235. logger.debug(f'Skipped commit: {commit}')
  236. continue
  237. elif skip:
  238. logger.debug(f'Reached Release commit, breaking: {commit}')
  239. break
  240. if match := self.REVERT_RE.fullmatch(commit.short):
  241. reverts[match.group(1)] = commit
  242. continue
  243. if fix_commitish:
  244. fixes[fix_commitish].append(commit)
  245. commits[commit.hash] = commit
  246. for commitish, revert_commit in reverts.items():
  247. if reverted := commits.pop(commitish, None):
  248. logger.debug(f'{commitish} fully reverted {reverted}')
  249. else:
  250. commits[revert_commit.hash] = revert_commit
  251. for commitish, fix_commits in fixes.items():
  252. if commitish in commits:
  253. hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits)
  254. logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}')
  255. for fix_commit in fix_commits:
  256. del commits[fix_commit.hash]
  257. else:
  258. logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}')
  259. return commits, fixes
  260. def apply_overrides(self, overrides):
  261. for override in overrides:
  262. when = override.get('when')
  263. if when and when not in self and when != self._start:
  264. logger.debug(f'Ignored {when!r} override')
  265. continue
  266. override_hash = override.get('hash') or when
  267. if override['action'] == 'add':
  268. commit = Commit(override.get('hash'), override['short'], override.get('authors') or [])
  269. logger.info(f'ADD {commit}')
  270. self._commits_added.append(commit)
  271. elif override['action'] == 'remove':
  272. if override_hash in self._commits:
  273. logger.info(f'REMOVE {self._commits[override_hash]}')
  274. del self._commits[override_hash]
  275. elif override['action'] == 'change':
  276. if override_hash not in self._commits:
  277. continue
  278. commit = Commit(override_hash, override['short'], override.get('authors') or [])
  279. logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}')
  280. self._commits[commit.hash] = commit
  281. self._commits = dict(reversed(self._commits.items()))
  282. def groups(self):
  283. group_dict = defaultdict(list)
  284. for commit in self:
  285. upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short)
  286. if upstream_re:
  287. commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}'
  288. match = self.MESSAGE_RE.fullmatch(commit.short)
  289. if not match:
  290. logger.error(f'Error parsing short commit message: {commit.short!r}')
  291. continue
  292. prefix, sub_details_alt, message, issues = match.groups()
  293. issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else []
  294. if prefix:
  295. groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(',')))
  296. group = next(iter(filter(None, groups)), None)
  297. details = ', '.join(unique(details))
  298. sub_details = list(itertools.chain.from_iterable(sub_details))
  299. else:
  300. group = CommitGroup.CORE
  301. details = None
  302. sub_details = []
  303. if sub_details_alt:
  304. sub_details.append(sub_details_alt)
  305. sub_details = tuple(unique(sub_details))
  306. if not group:
  307. if self.EXTRACTOR_INDICATOR_RE.search(commit.short):
  308. group = CommitGroup.EXTRACTOR
  309. logger.error(f'Assuming [ie] group for {commit.short!r}')
  310. else:
  311. group = CommitGroup.CORE
  312. commit_info = CommitInfo(
  313. details, sub_details, message.strip(),
  314. issues, commit, self._fixes[commit.hash])
  315. logger.debug(f'Resolved {commit.short!r} to {commit_info!r}')
  316. group_dict[group].append(commit_info)
  317. return group_dict
  318. @staticmethod
  319. def details_from_prefix(prefix):
  320. if not prefix:
  321. return CommitGroup.CORE, None, ()
  322. prefix, *sub_details = prefix.split(':')
  323. group, details = CommitGroup.get(prefix)
  324. if group is CommitGroup.PRIORITY and details:
  325. details = details.partition('/')[2].strip()
  326. if details and '/' in details:
  327. logger.error(f'Prefix is overnested, using first part: {prefix}')
  328. details = details.partition('/')[0].strip()
  329. if details == 'common':
  330. details = None
  331. elif group is CommitGroup.NETWORKING and details == 'rh':
  332. details = 'Request Handler'
  333. return group, details, sub_details
  334. def get_new_contributors(contributors_path, commits):
  335. contributors = set()
  336. if contributors_path.exists():
  337. for line in read_file(contributors_path).splitlines():
  338. author, _, _ = line.strip().partition(' (')
  339. authors = author.split('/')
  340. contributors.update(map(str.casefold, authors))
  341. new_contributors = set()
  342. for commit in commits:
  343. for author in commit.authors:
  344. author_folded = author.casefold()
  345. if author_folded not in contributors:
  346. contributors.add(author_folded)
  347. new_contributors.add(author)
  348. return sorted(new_contributors, key=str.casefold)
  349. def create_changelog(args):
  350. logging.basicConfig(
  351. datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}',
  352. level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr)
  353. commits = CommitRange(None, args.commitish, args.default_author)
  354. if not args.no_override:
  355. if args.override_path.exists():
  356. overrides = json.loads(read_file(args.override_path))
  357. commits.apply_overrides(overrides)
  358. else:
  359. logger.warning(f'File {args.override_path.as_posix()} does not exist')
  360. logger.info(f'Loaded {len(commits)} commits')
  361. if new_contributors := get_new_contributors(args.contributors_path, commits):
  362. if args.contributors:
  363. write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a')
  364. logger.info(f'New contributors: {", ".join(new_contributors)}')
  365. return Changelog(commits.groups(), args.repo, args.collapsible)
  366. def create_parser():
  367. import argparse
  368. parser = argparse.ArgumentParser(
  369. description='Create a changelog markdown from a git commit range')
  370. parser.add_argument(
  371. 'commitish', default='HEAD', nargs='?',
  372. help='The commitish to create the range from (default: %(default)s)')
  373. parser.add_argument(
  374. '-v', '--verbosity', action='count', default=0,
  375. help='increase verbosity (can be used twice)')
  376. parser.add_argument(
  377. '-c', '--contributors', action='store_true',
  378. help='update CONTRIBUTORS file (default: %(default)s)')
  379. parser.add_argument(
  380. '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS',
  381. help='path to the CONTRIBUTORS file')
  382. parser.add_argument(
  383. '--no-override', action='store_true',
  384. help='skip override json in commit generation (default: %(default)s)')
  385. parser.add_argument(
  386. '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json',
  387. help='path to the changelog_override.json file')
  388. parser.add_argument(
  389. '--default-author', default='pukkandan',
  390. help='the author to use without a author indicator (default: %(default)s)')
  391. parser.add_argument(
  392. '--repo', default='yt-dlp/yt-dlp',
  393. help='the github repository to use for the operations (default: %(default)s)')
  394. parser.add_argument(
  395. '--collapsible', action='store_true',
  396. help='make changelog collapsible (default: %(default)s)')
  397. return parser
  398. if __name__ == '__main__':
  399. print(create_changelog(create_parser().parse_args()))