codeowners_hint.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. """
  2. Copyright (c) Contributors to the Open 3D Engine Project.
  3. For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. SPDX-License-Identifier: Apache-2.0 OR MIT
  5. Tools for inspecting GitHub CODEOWNERS files
  6. """
  7. import argparse
  8. import logging
  9. import os
  10. import pathlib
  11. import re
  12. logger = logging.getLogger(__name__)
  13. _DEFAULT_CODEOWNER_ALIAS = "https://www.o3de.org/community/"
  14. _GITHUB_CODEOWNERS_BYTE_LIMIT = 3 * 1024 * 1024 # 3MB
  15. def get_codeowners(target_path: pathlib.PurePath) -> (str|None, str|None, pathlib.PurePath|None):
  16. """
  17. Finds ownership information matching the target filesystem path from a CODEOWNERS file found in its GitHub repo
  18. :param target_path: path to match in a GitHub CODEOWNERS file, which will be discovered inside its repo
  19. :return: tuple of (matched_path_entry, matched_owner_aliases, found_codeowners_path) which are empty when missing
  20. """
  21. codeowners_path = find_github_codeowners(target_path)
  22. if codeowners_path:
  23. matched_path, owner_aliases = get_codeowners_from(target_path, codeowners_path)
  24. else:
  25. matched_path = ""
  26. owner_aliases = ""
  27. codeowners_path = ""
  28. return matched_path, owner_aliases, codeowners_path
  29. def find_github_codeowners(target_path: pathlib.PurePath) -> pathlib.Path|None:
  30. """
  31. Finds the '.github/CODEOWNERS' file for the git repo containing target_path, scanning upward through the filesystem
  32. :param target_path: a path expected to exist in a GitHub repository containing a CODEOWNERS file
  33. :return: path to the CODEOWNERS file, or None if no file could be located
  34. """
  35. current_path = target_path
  36. for _ in range(1000):
  37. codeowners_path = os.path.join(current_path, ".github", "CODEOWNERS")
  38. if os.path.exists(codeowners_path):
  39. return pathlib.Path(codeowners_path)
  40. next_path = os.path.dirname(current_path)
  41. if next_path == current_path:
  42. break # reached filesystem root
  43. current_path = next_path
  44. logger.warning(f"No GitHub CODEOWNERS file found in a GitHub repo which contains {target_path}")
  45. return None
  46. def get_codeowners_from(target_path: pathlib.PurePath, codeowners_path: pathlib.PurePath) -> (str, str):
  47. """
  48. Fetch ownership information matching the target filesystem path from a CODEOWNERS file
  49. :param target_path: path to match in the GitHub CODEOWNERS file
  50. :param codeowners_path: path to CODEOWNERS file
  51. :return: tuple of (matched_path_entry, matched_owner_aliases), which will be empty when nothing was matched.
  52. The aliases will also be empty when a matched path is explicitly unowned.
  53. """
  54. if not os.path.isfile(codeowners_path):
  55. logger.warning(f"No GitHub CODEOWNERS file found at {codeowners_path}")
  56. return "", ""
  57. if os.path.getsize(codeowners_path) > _GITHUB_CODEOWNERS_BYTE_LIMIT:
  58. logger.warning(f"GitHub CODEOWNERS file found at {codeowners_path} exceeds the standard limit of "
  59. f"{_GITHUB_CODEOWNERS_BYTE_LIMIT} bytes")
  60. return "", ""
  61. # operate only on unix-style separators
  62. repo_root = pathlib.PurePosixPath(codeowners_path.parent.parent)
  63. unix_normalized_target = pathlib.PurePosixPath(target_path)
  64. if not unix_normalized_target.is_relative_to(repo_root):
  65. logger.warning(f"Path '{target_path}' is not inside the repo of GitHub CODEOWNERS file {codeowners_path}")
  66. return "", ""
  67. repo_relative_target = unix_normalized_target.relative_to(repo_root)
  68. repo_rooted_target = pathlib.PurePosixPath("/" + str(repo_relative_target)) # relative_to removes leading slash
  69. with open(codeowners_path) as codeowners_file:
  70. # GitHub syntax only applies the final matching rule ==> parse in reverse order and take first match
  71. for line in reversed(list(codeowners_file)):
  72. clean_line = line.strip()
  73. if clean_line and not clean_line.startswith('#'): # ignore blanks and full-line comments
  74. # entry format should be "owned/path/ @alias1 @alias2 user@example.com @aliasN..."
  75. split_entry = line.split(maxsplit=1)
  76. owned_path = split_entry[0]
  77. if _codeowners_path_matches(repo_rooted_target, owned_path):
  78. if len(split_entry) > 1:
  79. aliases = split_entry[1].split("#", maxsplit=1)[0].strip() # remove trailing comment
  80. else: # explicitly unowned entry with no comment
  81. aliases = ""
  82. return owned_path, aliases
  83. # else invalid entry syntax, ignore
  84. return "", "" # no match found
  85. def _codeowners_path_matches(target_path: pathlib.PurePosixPath, owned_path: str) -> bool:
  86. """
  87. :param target_path: PurePosixPath to match against, which starts from the root of the repo
  88. :param owned_path: path identifier found in a GitHub CODEOWNERS file (relative to root, may contain wildcards)
  89. :return: True when target_path is contained by the rules of owned_path
  90. """
  91. matched = False
  92. if '*' in owned_path or '?' in owned_path: # wildcards require glob matching
  93. if owned_path.startswith("*"): # special simple case for global wildcards
  94. matched = target_path.match(owned_path)
  95. elif owned_path.startswith("/"): # ownership of specific directory: glob A against B
  96. matched = target_path.match(owned_path[1:])
  97. else: # ownership of all relative directories: find non-wildcard portions of B in A, glob the remainders
  98. asterisk = owned_path.find("*")
  99. question = owned_path.find("?")
  100. if asterisk > -1 and question > -1:
  101. first_wildcard_index = min(asterisk, question)
  102. else: # avoid not-found index
  103. first_wildcard_index = max(asterisk, question)
  104. separator_indices = [index.start() for index in re.finditer(pattern="/", string=owned_path)]
  105. pre_wildcard_separator_index = 0
  106. for s_index in separator_indices:
  107. if s_index < first_wildcard_index:
  108. pre_wildcard_separator_index = s_index
  109. else: # remainder are all greater
  110. break
  111. # separate non-wildcard-containing path from remainder
  112. pre_wildcard_owned = owned_path[:pre_wildcard_separator_index]
  113. wildcard_with_remainder_owned = owned_path[pre_wildcard_separator_index+1:]
  114. # find substrings of initial portion of B within A
  115. target_str = str(target_path)
  116. pre_wildcard_target_end_indices = [index.end() for index in
  117. re.finditer(pattern=pre_wildcard_owned, string=target_str)]
  118. # glob remainders of A against remainder of B
  119. for target_index in pre_wildcard_target_end_indices: # may be multiple substring matches within target
  120. target_remainder = target_str[target_index:]
  121. if pathlib.PurePosixPath(target_remainder).match(wildcard_with_remainder_owned):
  122. matched = True
  123. break # exit early on success
  124. else: # simple path matching
  125. if owned_path.startswith("/"): # ownership of specific directory: verify if A exists inside B
  126. matched = target_path.is_relative_to(owned_path)
  127. else: # ownership of all relative directories: verify if B is a substring of A
  128. matched = owned_path in str(target_path)
  129. return matched
  130. def _pretty_print_success(print_fn, found_codeowners_path, matched_path, owner_aliases) -> None:
  131. """
  132. Prints a friendly message, instead of the default terse output of owner alias(es)
  133. :param print_fn: function to call when logging strings
  134. :param found_codeowners_path: verified path to a GitHub CODEOWNERS file
  135. :param matched_path: first part of an entry matched in the CODEOWNERS file
  136. :param owner_aliases: second part of an entry in a CODEOWNERS file
  137. """
  138. print_fn(f"Matched '{matched_path}' in file {found_codeowners_path}")
  139. print_fn(f"For additional support please reach out to: {owner_aliases}")
  140. def _pretty_print_failure(print_fn, found_codeowners_path, matched_path, original_target,
  141. default_alias=_DEFAULT_CODEOWNER_ALIAS) -> None:
  142. """
  143. Prints a friendly message about failure to find an owner
  144. :param print_fn: function to call when logging strings
  145. :param found_codeowners_path: verified path to a GitHub CODEOWNERS file which, empty when missing
  146. :param matched_path: entry matched in the CODEOWNERS file, empty when not matched
  147. :param original_target: the path which matching was attempted on
  148. :param default_alias: who to contact as no owner was found
  149. """
  150. if not found_codeowners_path:
  151. print_fn(f"No GitHub CODEOWNERS file was found for '{original_target}'")
  152. else:
  153. if not matched_path:
  154. print_fn(f"No ownership information for '{original_target}' found in file {found_codeowners_path}")
  155. else:
  156. print_fn(f"Ownership for '{matched_path}' is explicitly empty in file {found_codeowners_path}")
  157. print_fn(f"For additional support please reach out to: {default_alias}")
  158. def _main() -> int:
  159. parser = argparse.ArgumentParser(description="Display GitHub CODEOWNERS information to stdout for a target path")
  160. parser.add_argument('target', metavar='T', type=pathlib.Path,
  161. help="file path to find an owner for")
  162. parser.add_argument('-c', '--codeowners', type=pathlib.Path,
  163. help="path to a GitHub CODEOWNERS file, when not set this will scan upward to find the repo "
  164. "containing the target")
  165. parser.add_argument('-d', '--default_alias', default=_DEFAULT_CODEOWNER_ALIAS,
  166. help="a default location to reach out for support, for when ownership cannot be determined")
  167. parser.add_argument('-p', '--pretty_print', action='store_true',
  168. help="output ownership info as a friendly message instead of only alias(es)")
  169. parser.add_argument('-s', '--silent', action='store_true',
  170. help="Suppress any warning messages and only print ownership information")
  171. args = parser.parse_args()
  172. if args.silent:
  173. logging.disable()
  174. else:
  175. logging.basicConfig()
  176. if args.codeowners:
  177. matched_path, owner_aliases = get_codeowners_from(args.target, args.codeowners)
  178. found_codeowners = os.path.isfile(args.codeowners)
  179. else:
  180. matched_path, owner_aliases, found_codeowners = get_codeowners(args.target)
  181. if owner_aliases and matched_path and found_codeowners:
  182. if args.pretty_print:
  183. _pretty_print_success(print, found_codeowners, matched_path, owner_aliases)
  184. else:
  185. print(owner_aliases)
  186. return 0
  187. else:
  188. if args.pretty_print:
  189. _pretty_print_failure(print, found_codeowners, matched_path, args.target, args.default_alias)
  190. else:
  191. print(args.default_alias)
  192. return 1
  193. logger.error("Unexpected abnormal exit")
  194. return -1