123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601 |
- #!/usr/bin/env python
- # Copy of https://github.com/llvm-mirror/clang/blob/master/tools/clang-format/git-clang-format
- # Adds a --diffstat option to show the files needing formatting.
- # This change will be upstreamed, but the current git-clang-format does not
- # have it yet. We use it in the internal scripts/clang-format.sh
- #
- #===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
- #
- # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- # See https://llvm.org/LICENSE.txt for license information.
- # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- #
- #===------------------------------------------------------------------------===#
- r"""
- clang-format git integration
- ============================
- This file provides a clang-format integration for git. Put it somewhere in your
- path and ensure that it is executable. Then, "git clang-format" will invoke
- clang-format on the changes in current files or a specific commit.
- For further details, run:
- git clang-format -h
- Requires Python 2.7 or Python 3
- """
- from __future__ import absolute_import, division, print_function
- import argparse
- import collections
- import contextlib
- import errno
- import os
- import re
- import subprocess
- import sys
- usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
- desc = '''
- If zero or one commits are given, run clang-format on all lines that differ
- between the working directory and <commit>, which defaults to HEAD. Changes are
- only applied to the working directory.
- If two commits are given (requires --diff), run clang-format on all lines in the
- second <commit> that differ from the first <commit>.
- The following git-config settings set the default of the corresponding option:
- clangFormat.binary
- clangFormat.commit
- clangFormat.extensions
- clangFormat.style
- '''
- # Name of the temporary index file in which save the output of clang-format.
- # This file is created within the .git directory.
- temp_index_basename = 'clang-format-index'
- Range = collections.namedtuple('Range', 'start, count')
- def main():
- config = load_git_config()
- # In order to keep '--' yet allow options after positionals, we need to
- # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
- # nargs=argparse.REMAINDER disallows options after positionals.)
- argv = sys.argv[1:]
- try:
- idx = argv.index('--')
- except ValueError:
- dash_dash = []
- else:
- dash_dash = argv[idx:]
- argv = argv[:idx]
- default_extensions = ','.join([
- # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
- 'c', 'h', # C
- 'm', # ObjC
- 'mm', # ObjC++
- 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hh', 'hpp', 'hxx', # C++
- 'cu', # CUDA
- # Other languages that clang-format supports
- 'proto', 'protodevel', # Protocol Buffers
- 'java', # Java
- 'js', # JavaScript
- 'ts', # TypeScript
- 'cs', # C Sharp
- ])
- p = argparse.ArgumentParser(
- usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
- description=desc)
- p.add_argument('--binary',
- default=config.get('clangformat.binary', 'clang-format'),
- help='path to clang-format'),
- p.add_argument('--commit',
- default=config.get('clangformat.commit', 'HEAD'),
- help='default commit to use if none is specified'),
- p.add_argument('--diff', action='store_true',
- help='print a diff instead of applying the changes')
- p.add_argument('--diffstat', action='store_true',
- help='print diffstat instead of applying the changes')
- p.add_argument('--extensions',
- default=config.get('clangformat.extensions',
- default_extensions),
- help=('comma-separated list of file extensions to format, '
- 'excluding the period and case-insensitive')),
- p.add_argument('-f', '--force', action='store_true',
- help='allow changes to unstaged files')
- p.add_argument('-p', '--patch', action='store_true',
- help='select hunks interactively')
- p.add_argument('-q', '--quiet', action='count', default=0,
- help='print less information')
- p.add_argument('--style',
- default=config.get('clangformat.style', None),
- help='passed to clang-format'),
- p.add_argument('-v', '--verbose', action='count', default=0,
- help='print extra information')
- # We gather all the remaining positional arguments into 'args' since we need
- # to use some heuristics to determine whether or not <commit> was present.
- # However, to print pretty messages, we make use of metavar and help.
- p.add_argument('args', nargs='*', metavar='<commit>',
- help='revision from which to compute the diff')
- p.add_argument('ignored', nargs='*', metavar='<file>...',
- help='if specified, only consider differences in these files')
- opts = p.parse_args(argv)
- opts.verbose -= opts.quiet
- del opts.quiet
- commits, files = interpret_args(opts.args, dash_dash, opts.commit)
- if len(commits) > 1:
- if not opts.diff:
- die('--diff is required when two commits are given')
- else:
- if len(commits) > 2:
- die('at most two commits allowed; %d given' % len(commits))
- changed_lines = compute_diff_and_extract_lines(commits, files)
- if opts.verbose >= 1:
- ignored_files = set(changed_lines)
- filter_by_extension(changed_lines, opts.extensions.lower().split(','))
- if opts.verbose >= 1:
- ignored_files.difference_update(changed_lines)
- if ignored_files:
- print('Ignoring changes in the following files (wrong extension):')
- for filename in ignored_files:
- print(' %s' % filename)
- if changed_lines:
- print('Running clang-format on the following files:')
- for filename in changed_lines:
- print(' %s' % filename)
- if not changed_lines:
- print('no modified files to format')
- return
- # The computed diff outputs absolute paths, so we must cd before accessing
- # those files.
- cd_to_toplevel()
- if len(commits) > 1:
- old_tree = commits[1]
- new_tree = run_clang_format_and_save_to_tree(changed_lines,
- revision=commits[1],
- binary=opts.binary,
- style=opts.style)
- else:
- old_tree = create_tree_from_workdir(changed_lines)
- new_tree = run_clang_format_and_save_to_tree(changed_lines,
- binary=opts.binary,
- style=opts.style)
- if opts.verbose >= 1:
- print('old tree: %s' % old_tree)
- print('new tree: %s' % new_tree)
- if old_tree == new_tree:
- if opts.verbose >= 0:
- print('clang-format did not modify any files')
- elif opts.diff:
- print_diff(old_tree, new_tree)
- elif opts.diffstat:
- print_diffstat(old_tree, new_tree)
- else:
- changed_files = apply_changes(old_tree, new_tree, force=opts.force,
- patch_mode=opts.patch)
- if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
- print('changed files:')
- for filename in changed_files:
- print(' %s' % filename)
- def load_git_config(non_string_options=None):
- """Return the git configuration as a dictionary.
- All options are assumed to be strings unless in `non_string_options`, in which
- is a dictionary mapping option name (in lower case) to either "--bool" or
- "--int"."""
- if non_string_options is None:
- non_string_options = {}
- out = {}
- for entry in run('git', 'config', '--list', '--null').split('\0'):
- if entry:
- name, value = entry.split('\n', 1)
- if name in non_string_options:
- value = run('git', 'config', non_string_options[name], name)
- out[name] = value
- return out
- def interpret_args(args, dash_dash, default_commit):
- """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
- It is assumed that "--" and everything that follows has been removed from
- args and placed in `dash_dash`.
- If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
- left (if present) are taken as commits. Otherwise, the arguments are checked
- from left to right if they are commits or files. If commits are not given,
- a list with `default_commit` is used."""
- if dash_dash:
- if len(args) == 0:
- commits = [default_commit]
- else:
- commits = args
- for commit in commits:
- object_type = get_object_type(commit)
- if object_type not in ('commit', 'tag'):
- if object_type is None:
- die("'%s' is not a commit" % commit)
- else:
- die("'%s' is a %s, but a commit was expected" % (commit, object_type))
- files = dash_dash[1:]
- elif args:
- commits = []
- while args:
- if not disambiguate_revision(args[0]):
- break
- commits.append(args.pop(0))
- if not commits:
- commits = [default_commit]
- files = args
- else:
- commits = [default_commit]
- files = []
- return commits, files
- def disambiguate_revision(value):
- """Returns True if `value` is a revision, False if it is a file, or dies."""
- # If `value` is ambiguous (neither a commit nor a file), the following
- # command will die with an appropriate error message.
- run('git', 'rev-parse', value, verbose=False)
- object_type = get_object_type(value)
- if object_type is None:
- return False
- if object_type in ('commit', 'tag'):
- return True
- die('`%s` is a %s, but a commit or filename was expected' %
- (value, object_type))
- def get_object_type(value):
- """Returns a string description of an object's type, or None if it is not
- a valid git object."""
- cmd = ['git', 'cat-file', '-t', value]
- p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- stdout, stderr = p.communicate()
- if p.returncode != 0:
- return None
- return convert_string(stdout.strip())
- def compute_diff_and_extract_lines(commits, files):
- """Calls compute_diff() followed by extract_lines()."""
- diff_process = compute_diff(commits, files)
- changed_lines = extract_lines(diff_process.stdout)
- diff_process.stdout.close()
- diff_process.wait()
- if diff_process.returncode != 0:
- # Assume error was already printed to stderr.
- sys.exit(2)
- return changed_lines
- def compute_diff(commits, files):
- """Return a subprocess object producing the diff from `commits`.
- The return value's `stdin` file object will produce a patch with the
- differences between the working directory and the first commit if a single
- one was specified, or the difference between both specified commits, filtered
- on `files` (if non-empty). Zero context lines are used in the patch."""
- git_tool = 'diff-index'
- if len(commits) > 1:
- git_tool = 'diff-tree'
- cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
- cmd.extend(files)
- p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
- p.stdin.close()
- return p
- def extract_lines(patch_file):
- """Extract the changed lines in `patch_file`.
- The return value is a dictionary mapping filename to a list of (start_line,
- line_count) pairs.
- The input must have been produced with ``-U0``, meaning unidiff format with
- zero lines of context. The return value is a dict mapping filename to a
- list of line `Range`s."""
- matches = {}
- for line in patch_file:
- line = convert_string(line)
- match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
- if match:
- filename = match.group(1).rstrip('\r\n')
- match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
- if match:
- start_line = int(match.group(1))
- line_count = 1
- if match.group(3):
- line_count = int(match.group(3))
- if line_count > 0:
- matches.setdefault(filename, []).append(Range(start_line, line_count))
- return matches
- def filter_by_extension(dictionary, allowed_extensions):
- """Delete every key in `dictionary` that doesn't have an allowed extension.
- `allowed_extensions` must be a collection of lowercase file extensions,
- excluding the period."""
- allowed_extensions = frozenset(allowed_extensions)
- for filename in list(dictionary.keys()):
- base_ext = filename.rsplit('.', 1)
- if len(base_ext) == 1 and '' in allowed_extensions:
- continue
- if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
- del dictionary[filename]
- def cd_to_toplevel():
- """Change to the top level of the git repository."""
- toplevel = run('git', 'rev-parse', '--show-toplevel')
- os.chdir(toplevel)
- def create_tree_from_workdir(filenames):
- """Create a new git tree with the given files from the working directory.
- Returns the object ID (SHA-1) of the created tree."""
- return create_tree(filenames, '--stdin')
- def run_clang_format_and_save_to_tree(changed_lines, revision=None,
- binary='clang-format', style=None):
- """Run clang-format on each file and save the result to a git tree.
- Returns the object ID (SHA-1) of the created tree."""
- def iteritems(container):
- try:
- return container.iteritems() # Python 2
- except AttributeError:
- return container.items() # Python 3
- def index_info_generator():
- for filename, line_ranges in iteritems(changed_lines):
- if revision:
- git_metadata_cmd = ['git', 'ls-tree',
- '%s:%s' % (revision, os.path.dirname(filename)),
- os.path.basename(filename)]
- git_metadata = subprocess.Popen(git_metadata_cmd, stdin=subprocess.PIPE,
- stdout=subprocess.PIPE)
- stdout = git_metadata.communicate()[0]
- mode = oct(int(stdout.split()[0], 8))
- else:
- mode = oct(os.stat(filename).st_mode)
- # Adjust python3 octal format so that it matches what git expects
- if mode.startswith('0o'):
- mode = '0' + mode[2:]
- blob_id = clang_format_to_blob(filename, line_ranges,
- revision=revision,
- binary=binary,
- style=style)
- yield '%s %s\t%s' % (mode, blob_id, filename)
- return create_tree(index_info_generator(), '--index-info')
- def create_tree(input_lines, mode):
- """Create a tree object from the given input.
- If mode is '--stdin', it must be a list of filenames. If mode is
- '--index-info' is must be a list of values suitable for "git update-index
- --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
- is invalid."""
- assert mode in ('--stdin', '--index-info')
- cmd = ['git', 'update-index', '--add', '-z', mode]
- with temporary_index_file():
- p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
- for line in input_lines:
- p.stdin.write(to_bytes('%s\0' % line))
- p.stdin.close()
- if p.wait() != 0:
- die('`%s` failed' % ' '.join(cmd))
- tree_id = run('git', 'write-tree')
- return tree_id
- def clang_format_to_blob(filename, line_ranges, revision=None,
- binary='clang-format', style=None):
- """Run clang-format on the given file and save the result to a git blob.
- Runs on the file in `revision` if not None, or on the file in the working
- directory if `revision` is None.
- Returns the object ID (SHA-1) of the created blob."""
- clang_format_cmd = [binary]
- if style:
- clang_format_cmd.extend(['-style='+style])
- clang_format_cmd.extend([
- '-lines=%s:%s' % (start_line, start_line+line_count-1)
- for start_line, line_count in line_ranges])
- if revision:
- clang_format_cmd.extend(['-assume-filename='+filename])
- git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
- git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
- stdout=subprocess.PIPE)
- git_show.stdin.close()
- clang_format_stdin = git_show.stdout
- else:
- clang_format_cmd.extend([filename])
- git_show = None
- clang_format_stdin = subprocess.PIPE
- try:
- clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
- stdout=subprocess.PIPE)
- if clang_format_stdin == subprocess.PIPE:
- clang_format_stdin = clang_format.stdin
- except OSError as e:
- if e.errno == errno.ENOENT:
- die('cannot find executable "%s"' % binary)
- else:
- raise
- clang_format_stdin.close()
- hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
- hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
- stdout=subprocess.PIPE)
- clang_format.stdout.close()
- stdout = hash_object.communicate()[0]
- if hash_object.returncode != 0:
- die('`%s` failed' % ' '.join(hash_object_cmd))
- if clang_format.wait() != 0:
- die('`%s` failed' % ' '.join(clang_format_cmd))
- if git_show and git_show.wait() != 0:
- die('`%s` failed' % ' '.join(git_show_cmd))
- return convert_string(stdout).rstrip('\r\n')
- @contextlib.contextmanager
- def temporary_index_file(tree=None):
- """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
- the file afterward."""
- index_path = create_temporary_index(tree)
- old_index_path = os.environ.get('GIT_INDEX_FILE')
- os.environ['GIT_INDEX_FILE'] = index_path
- try:
- yield
- finally:
- if old_index_path is None:
- del os.environ['GIT_INDEX_FILE']
- else:
- os.environ['GIT_INDEX_FILE'] = old_index_path
- os.remove(index_path)
- def create_temporary_index(tree=None):
- """Create a temporary index file and return the created file's path.
- If `tree` is not None, use that as the tree to read in. Otherwise, an
- empty index is created."""
- gitdir = run('git', 'rev-parse', '--git-dir')
- path = os.path.join(gitdir, temp_index_basename)
- if tree is None:
- tree = '--empty'
- run('git', 'read-tree', '--index-output='+path, tree)
- return path
- def print_diff(old_tree, new_tree):
- """Print the diff between the two trees to stdout."""
- # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
- # is expected to be viewed by the user, and only the former does nice things
- # like color and pagination.
- #
- # We also only print modified files since `new_tree` only contains the files
- # that were modified, so unmodified files would show as deleted without the
- # filter.
- subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
- '--'])
- def print_diffstat(old_tree, new_tree):
- """Print the diffstat between the two trees to stdout."""
- # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
- # is expected to be viewed by the user, and only the former does nice things
- # like color and pagination.
- #
- # We also only print modified files since `new_tree` only contains the files
- # that were modified, so unmodified files would show as deleted without the
- # filter.
- subprocess.check_call(['git', 'diff', '--diff-filter=M', '--stat', old_tree, new_tree,
- '--'])
- def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
- """Apply the changes in `new_tree` to the working directory.
- Bails if there are local changes in those files and not `force`. If
- `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
- changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
- '--name-only', old_tree,
- new_tree).rstrip('\0').split('\0')
- if not force:
- unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
- if unstaged_files:
- print('The following files would be modified but '
- 'have unstaged changes:', file=sys.stderr)
- print(unstaged_files, file=sys.stderr)
- print('Please commit, stage, or stash them first.', file=sys.stderr)
- sys.exit(2)
- if patch_mode:
- # In patch mode, we could just as well create an index from the new tree
- # and checkout from that, but then the user will be presented with a
- # message saying "Discard ... from worktree". Instead, we use the old
- # tree as the index and checkout from new_tree, which gives the slightly
- # better message, "Apply ... to index and worktree". This is not quite
- # right, since it won't be applied to the user's index, but oh well.
- with temporary_index_file(old_tree):
- subprocess.check_call(['git', 'checkout', '--patch', new_tree])
- index_tree = old_tree
- else:
- with temporary_index_file(new_tree):
- run('git', 'checkout-index', '-a', '-f')
- return changed_files
- def run(*args, **kwargs):
- stdin = kwargs.pop('stdin', '')
- verbose = kwargs.pop('verbose', True)
- strip = kwargs.pop('strip', True)
- for name in kwargs:
- raise TypeError("run() got an unexpected keyword argument '%s'" % name)
- p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
- stdin=subprocess.PIPE)
- stdout, stderr = p.communicate(input=stdin)
- stdout = convert_string(stdout)
- stderr = convert_string(stderr)
- if p.returncode == 0:
- if stderr:
- if verbose:
- print('`%s` printed to stderr:' % ' '.join(args), file=sys.stderr)
- print(stderr.rstrip(), file=sys.stderr)
- if strip:
- stdout = stdout.rstrip('\r\n')
- return stdout
- if verbose:
- print('`%s` returned %s' % (' '.join(args), p.returncode), file=sys.stderr)
- if stderr:
- print(stderr.rstrip(), file=sys.stderr)
- sys.exit(2)
- def die(message):
- print('error:', message, file=sys.stderr)
- sys.exit(2)
- def to_bytes(str_input):
- # Encode to UTF-8 to get binary data.
- if isinstance(str_input, bytes):
- return str_input
- return str_input.encode('utf-8')
- def to_string(bytes_input):
- if isinstance(bytes_input, str):
- return bytes_input
- return bytes_input.encode('utf-8')
- def convert_string(bytes_input):
- try:
- return to_string(bytes_input.decode('utf-8'))
- except AttributeError: # 'str' object has no attribute 'decode'.
- return str(bytes_input)
- except UnicodeError:
- return str(bytes_input)
- if __name__ == '__main__':
- main()
|