blob: ffa14368b663b55871fc99a471971760775d1e36 [file] [log] [blame]
Ed Schoutenbf041d92014-09-02 20:59:13 +00001#!/usr/bin/env python
Daniel Jaspere7a50012013-05-23 17:53:42 +00002#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""
13clang-format git integration
14============================
15
16This file provides a clang-format integration for git. Put it somewhere in your
17path and ensure that it is executable. Then, "git clang-format" will invoke
18clang-format on the changes in current files or a specific commit.
19
20For further details, run:
21git clang-format -h
22
23Requires Python 2.7
24"""
25
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
Stephen Hines90ced942016-09-22 05:52:55 +000035usage = 'git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]'
Daniel Jaspere7a50012013-05-23 17:53:42 +000036
37desc = '''
Stephen Hines90ced942016-09-22 05:52:55 +000038If zero or one commits are given, run clang-format on all lines that differ
39between the working directory and <commit>, which defaults to HEAD. Changes are
40only applied to the working directory.
41
42If two commits are given (requires --diff), run clang-format on all lines in the
43second <commit> that differ from the first <commit>.
Daniel Jaspere7a50012013-05-23 17:53:42 +000044
45The following git-config settings set the default of the corresponding option:
46 clangFormat.binary
47 clangFormat.commit
48 clangFormat.extension
49 clangFormat.style
50'''
51
52# Name of the temporary index file in which save the output of clang-format.
53# This file is created within the .git directory.
54temp_index_basename = 'clang-format-index'
55
56
57Range = collections.namedtuple('Range', 'start, count')
58
59
60def main():
61 config = load_git_config()
62
63 # In order to keep '--' yet allow options after positionals, we need to
64 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
65 # nargs=argparse.REMAINDER disallows options after positionals.)
66 argv = sys.argv[1:]
67 try:
68 idx = argv.index('--')
69 except ValueError:
70 dash_dash = []
71 else:
72 dash_dash = argv[idx:]
73 argv = argv[:idx]
74
75 default_extensions = ','.join([
76 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
77 'c', 'h', # C
78 'm', # ObjC
79 'mm', # ObjC++
80 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++
Daniel Jasperdfacecb2014-04-09 09:22:35 +000081 # Other languages that clang-format supports
82 'proto', 'protodevel', # Protocol Buffers
Stephen Hines815e9bb2016-09-13 05:00:20 +000083 'java', # Java
Daniel Jasperdfacecb2014-04-09 09:22:35 +000084 'js', # JavaScript
Daniel Jasperc105a9a2015-06-19 08:23:10 +000085 'ts', # TypeScript
Daniel Jaspere7a50012013-05-23 17:53:42 +000086 ])
87
88 p = argparse.ArgumentParser(
89 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
90 description=desc)
91 p.add_argument('--binary',
92 default=config.get('clangformat.binary', 'clang-format'),
93 help='path to clang-format'),
94 p.add_argument('--commit',
95 default=config.get('clangformat.commit', 'HEAD'),
96 help='default commit to use if none is specified'),
97 p.add_argument('--diff', action='store_true',
98 help='print a diff instead of applying the changes')
99 p.add_argument('--extensions',
100 default=config.get('clangformat.extensions',
101 default_extensions),
102 help=('comma-separated list of file extensions to format, '
103 'excluding the period and case-insensitive')),
104 p.add_argument('-f', '--force', action='store_true',
105 help='allow changes to unstaged files')
106 p.add_argument('-p', '--patch', action='store_true',
107 help='select hunks interactively')
108 p.add_argument('-q', '--quiet', action='count', default=0,
109 help='print less information')
110 p.add_argument('--style',
111 default=config.get('clangformat.style', None),
112 help='passed to clang-format'),
113 p.add_argument('-v', '--verbose', action='count', default=0,
114 help='print extra information')
115 # We gather all the remaining positional arguments into 'args' since we need
116 # to use some heuristics to determine whether or not <commit> was present.
117 # However, to print pretty messages, we make use of metavar and help.
118 p.add_argument('args', nargs='*', metavar='<commit>',
119 help='revision from which to compute the diff')
120 p.add_argument('ignored', nargs='*', metavar='<file>...',
121 help='if specified, only consider differences in these files')
122 opts = p.parse_args(argv)
123
124 opts.verbose -= opts.quiet
125 del opts.quiet
126
Stephen Hines90ced942016-09-22 05:52:55 +0000127 commits, files = interpret_args(opts.args, dash_dash, opts.commit)
128 if len(commits) > 1:
129 if not opts.diff:
130 die('--diff is required when two commits are given')
131 else:
132 if len(commits) > 2:
133 die('at most two commits allowed; %d given' % len(commits))
134 changed_lines = compute_diff_and_extract_lines(commits, files)
Daniel Jaspere7a50012013-05-23 17:53:42 +0000135 if opts.verbose >= 1:
136 ignored_files = set(changed_lines)
137 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
138 if opts.verbose >= 1:
139 ignored_files.difference_update(changed_lines)
140 if ignored_files:
141 print 'Ignoring changes in the following files (wrong extension):'
142 for filename in ignored_files:
143 print ' ', filename
144 if changed_lines:
145 print 'Running clang-format on the following files:'
146 for filename in changed_lines:
147 print ' ', filename
148 if not changed_lines:
149 print 'no modified files to format'
150 return
151 # The computed diff outputs absolute paths, so we must cd before accessing
152 # those files.
153 cd_to_toplevel()
Stephen Hines90ced942016-09-22 05:52:55 +0000154 if len(commits) > 1:
155 old_tree = commits[1]
156 new_tree = run_clang_format_and_save_to_tree(changed_lines,
157 revision=commits[1],
158 binary=opts.binary,
159 style=opts.style)
160 else:
161 old_tree = create_tree_from_workdir(changed_lines)
162 new_tree = run_clang_format_and_save_to_tree(changed_lines,
163 binary=opts.binary,
164 style=opts.style)
Daniel Jaspere7a50012013-05-23 17:53:42 +0000165 if opts.verbose >= 1:
166 print 'old tree:', old_tree
167 print 'new tree:', new_tree
168 if old_tree == new_tree:
169 if opts.verbose >= 0:
170 print 'clang-format did not modify any files'
171 elif opts.diff:
172 print_diff(old_tree, new_tree)
173 else:
174 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
175 patch_mode=opts.patch)
176 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
177 print 'changed files:'
178 for filename in changed_files:
179 print ' ', filename
180
181
182def load_git_config(non_string_options=None):
183 """Return the git configuration as a dictionary.
184
185 All options are assumed to be strings unless in `non_string_options`, in which
186 is a dictionary mapping option name (in lower case) to either "--bool" or
187 "--int"."""
188 if non_string_options is None:
189 non_string_options = {}
190 out = {}
191 for entry in run('git', 'config', '--list', '--null').split('\0'):
192 if entry:
193 name, value = entry.split('\n', 1)
194 if name in non_string_options:
195 value = run('git', 'config', non_string_options[name], name)
196 out[name] = value
197 return out
198
199
200def interpret_args(args, dash_dash, default_commit):
Stephen Hines90ced942016-09-22 05:52:55 +0000201 """Interpret `args` as "[commits] [--] [files]" and return (commits, files).
Daniel Jaspere7a50012013-05-23 17:53:42 +0000202
203 It is assumed that "--" and everything that follows has been removed from
204 args and placed in `dash_dash`.
205
Stephen Hines90ced942016-09-22 05:52:55 +0000206 If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its
207 left (if present) are taken as commits. Otherwise, the arguments are checked
208 from left to right if they are commits or files. If commits are not given,
209 a list with `default_commit` is used."""
Daniel Jaspere7a50012013-05-23 17:53:42 +0000210 if dash_dash:
211 if len(args) == 0:
Stephen Hines90ced942016-09-22 05:52:55 +0000212 commits = [default_commit]
Daniel Jaspere7a50012013-05-23 17:53:42 +0000213 else:
Stephen Hines90ced942016-09-22 05:52:55 +0000214 commits = args
215 for commit in commits:
216 object_type = get_object_type(commit)
217 if object_type not in ('commit', 'tag'):
218 if object_type is None:
219 die("'%s' is not a commit" % commit)
220 else:
221 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
Daniel Jaspere7a50012013-05-23 17:53:42 +0000222 files = dash_dash[1:]
223 elif args:
Stephen Hines90ced942016-09-22 05:52:55 +0000224 commits = []
225 while args:
226 if not disambiguate_revision(args[0]):
227 break
228 commits.append(args.pop(0))
229 if not commits:
230 commits = [default_commit]
231 files = args
Daniel Jaspere7a50012013-05-23 17:53:42 +0000232 else:
Stephen Hines90ced942016-09-22 05:52:55 +0000233 commits = [default_commit]
Daniel Jaspere7a50012013-05-23 17:53:42 +0000234 files = []
Stephen Hines90ced942016-09-22 05:52:55 +0000235 return commits, files
Daniel Jaspere7a50012013-05-23 17:53:42 +0000236
237
238def disambiguate_revision(value):
239 """Returns True if `value` is a revision, False if it is a file, or dies."""
240 # If `value` is ambiguous (neither a commit nor a file), the following
241 # command will die with an appropriate error message.
242 run('git', 'rev-parse', value, verbose=False)
243 object_type = get_object_type(value)
244 if object_type is None:
245 return False
246 if object_type in ('commit', 'tag'):
247 return True
248 die('`%s` is a %s, but a commit or filename was expected' %
249 (value, object_type))
250
251
252def get_object_type(value):
253 """Returns a string description of an object's type, or None if it is not
254 a valid git object."""
255 cmd = ['git', 'cat-file', '-t', value]
256 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
257 stdout, stderr = p.communicate()
258 if p.returncode != 0:
259 return None
260 return stdout.strip()
261
262
Stephen Hines90ced942016-09-22 05:52:55 +0000263def compute_diff_and_extract_lines(commits, files):
Daniel Jaspere7a50012013-05-23 17:53:42 +0000264 """Calls compute_diff() followed by extract_lines()."""
Stephen Hines90ced942016-09-22 05:52:55 +0000265 diff_process = compute_diff(commits, files)
Daniel Jaspere7a50012013-05-23 17:53:42 +0000266 changed_lines = extract_lines(diff_process.stdout)
267 diff_process.stdout.close()
268 diff_process.wait()
269 if diff_process.returncode != 0:
270 # Assume error was already printed to stderr.
271 sys.exit(2)
272 return changed_lines
273
274
Stephen Hines90ced942016-09-22 05:52:55 +0000275def compute_diff(commits, files):
276 """Return a subprocess object producing the diff from `commits`.
Daniel Jaspere7a50012013-05-23 17:53:42 +0000277
278 The return value's `stdin` file object will produce a patch with the
Stephen Hines90ced942016-09-22 05:52:55 +0000279 differences between the working directory and the first commit if a single
280 one was specified, or the difference between both specified commits, filtered
281 on `files` (if non-empty). Zero context lines are used in the patch."""
282 git_tool = 'diff-index'
283 if len(commits) > 1:
284 git_tool = 'diff-tree'
285 cmd = ['git', git_tool, '-p', '-U0'] + commits + ['--']
Daniel Jaspere7a50012013-05-23 17:53:42 +0000286 cmd.extend(files)
287 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
288 p.stdin.close()
289 return p
290
291
292def extract_lines(patch_file):
293 """Extract the changed lines in `patch_file`.
294
Daniel Jasper695bad542013-08-01 18:17:13 +0000295 The return value is a dictionary mapping filename to a list of (start_line,
296 line_count) pairs.
297
Daniel Jaspere7a50012013-05-23 17:53:42 +0000298 The input must have been produced with ``-U0``, meaning unidiff format with
299 zero lines of context. The return value is a dict mapping filename to a
300 list of line `Range`s."""
301 matches = {}
302 for line in patch_file:
303 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
304 if match:
305 filename = match.group(1).rstrip('\r\n')
306 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
307 if match:
308 start_line = int(match.group(1))
309 line_count = 1
310 if match.group(3):
311 line_count = int(match.group(3))
312 if line_count > 0:
313 matches.setdefault(filename, []).append(Range(start_line, line_count))
314 return matches
315
316
317def filter_by_extension(dictionary, allowed_extensions):
318 """Delete every key in `dictionary` that doesn't have an allowed extension.
319
320 `allowed_extensions` must be a collection of lowercase file extensions,
321 excluding the period."""
322 allowed_extensions = frozenset(allowed_extensions)
323 for filename in dictionary.keys():
324 base_ext = filename.rsplit('.', 1)
325 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
326 del dictionary[filename]
327
328
329def cd_to_toplevel():
330 """Change to the top level of the git repository."""
331 toplevel = run('git', 'rev-parse', '--show-toplevel')
332 os.chdir(toplevel)
333
334
Daniel Jaspere7a50012013-05-23 17:53:42 +0000335def create_tree_from_workdir(filenames):
336 """Create a new git tree with the given files from the working directory.
337
338 Returns the object ID (SHA-1) of the created tree."""
339 return create_tree(filenames, '--stdin')
340
341
Stephen Hines90ced942016-09-22 05:52:55 +0000342def run_clang_format_and_save_to_tree(changed_lines, revision=None,
343 binary='clang-format', style=None):
Daniel Jaspere7a50012013-05-23 17:53:42 +0000344 """Run clang-format on each file and save the result to a git tree.
345
346 Returns the object ID (SHA-1) of the created tree."""
347 def index_info_generator():
Daniel Jasper695bad542013-08-01 18:17:13 +0000348 for filename, line_ranges in changed_lines.iteritems():
Daniel Jaspere7a50012013-05-23 17:53:42 +0000349 mode = oct(os.stat(filename).st_mode)
Stephen Hines90ced942016-09-22 05:52:55 +0000350 blob_id = clang_format_to_blob(filename, line_ranges,
351 revision=revision,
352 binary=binary,
Daniel Jaspere7a50012013-05-23 17:53:42 +0000353 style=style)
354 yield '%s %s\t%s' % (mode, blob_id, filename)
355 return create_tree(index_info_generator(), '--index-info')
356
357
358def create_tree(input_lines, mode):
359 """Create a tree object from the given input.
360
361 If mode is '--stdin', it must be a list of filenames. If mode is
362 '--index-info' is must be a list of values suitable for "git update-index
363 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
364 is invalid."""
365 assert mode in ('--stdin', '--index-info')
366 cmd = ['git', 'update-index', '--add', '-z', mode]
367 with temporary_index_file():
368 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
369 for line in input_lines:
370 p.stdin.write('%s\0' % line)
371 p.stdin.close()
372 if p.wait() != 0:
373 die('`%s` failed' % ' '.join(cmd))
374 tree_id = run('git', 'write-tree')
375 return tree_id
376
377
Stephen Hines90ced942016-09-22 05:52:55 +0000378def clang_format_to_blob(filename, line_ranges, revision=None,
379 binary='clang-format', style=None):
Daniel Jaspere7a50012013-05-23 17:53:42 +0000380 """Run clang-format on the given file and save the result to a git blob.
381
Stephen Hines90ced942016-09-22 05:52:55 +0000382 Runs on the file in `revision` if not None, or on the file in the working
383 directory if `revision` is None.
384
Daniel Jaspere7a50012013-05-23 17:53:42 +0000385 Returns the object ID (SHA-1) of the created blob."""
Stephen Hines90ced942016-09-22 05:52:55 +0000386 clang_format_cmd = [binary]
Daniel Jaspere7a50012013-05-23 17:53:42 +0000387 if style:
388 clang_format_cmd.extend(['-style='+style])
Daniel Jasper695bad542013-08-01 18:17:13 +0000389 clang_format_cmd.extend([
390 '-lines=%s:%s' % (start_line, start_line+line_count-1)
391 for start_line, line_count in line_ranges])
Stephen Hines90ced942016-09-22 05:52:55 +0000392 if revision:
393 clang_format_cmd.extend(['-assume-filename='+filename])
394 git_show_cmd = ['git', 'cat-file', 'blob', '%s:%s' % (revision, filename)]
395 git_show = subprocess.Popen(git_show_cmd, stdin=subprocess.PIPE,
396 stdout=subprocess.PIPE)
397 git_show.stdin.close()
398 clang_format_stdin = git_show.stdout
399 else:
400 clang_format_cmd.extend([filename])
401 git_show = None
402 clang_format_stdin = subprocess.PIPE
Daniel Jaspere7a50012013-05-23 17:53:42 +0000403 try:
Stephen Hines90ced942016-09-22 05:52:55 +0000404 clang_format = subprocess.Popen(clang_format_cmd, stdin=clang_format_stdin,
Daniel Jaspere7a50012013-05-23 17:53:42 +0000405 stdout=subprocess.PIPE)
Stephen Hines90ced942016-09-22 05:52:55 +0000406 if clang_format_stdin == subprocess.PIPE:
407 clang_format_stdin = clang_format.stdin
Daniel Jaspere7a50012013-05-23 17:53:42 +0000408 except OSError as e:
409 if e.errno == errno.ENOENT:
410 die('cannot find executable "%s"' % binary)
411 else:
412 raise
Stephen Hines90ced942016-09-22 05:52:55 +0000413 clang_format_stdin.close()
Daniel Jaspere7a50012013-05-23 17:53:42 +0000414 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
415 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
416 stdout=subprocess.PIPE)
417 clang_format.stdout.close()
418 stdout = hash_object.communicate()[0]
419 if hash_object.returncode != 0:
420 die('`%s` failed' % ' '.join(hash_object_cmd))
421 if clang_format.wait() != 0:
422 die('`%s` failed' % ' '.join(clang_format_cmd))
Stephen Hines90ced942016-09-22 05:52:55 +0000423 if git_show and git_show.wait() != 0:
424 die('`%s` failed' % ' '.join(git_show_cmd))
Daniel Jaspere7a50012013-05-23 17:53:42 +0000425 return stdout.rstrip('\r\n')
426
427
428@contextlib.contextmanager
429def temporary_index_file(tree=None):
430 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
431 the file afterward."""
432 index_path = create_temporary_index(tree)
433 old_index_path = os.environ.get('GIT_INDEX_FILE')
434 os.environ['GIT_INDEX_FILE'] = index_path
435 try:
436 yield
437 finally:
438 if old_index_path is None:
439 del os.environ['GIT_INDEX_FILE']
440 else:
441 os.environ['GIT_INDEX_FILE'] = old_index_path
442 os.remove(index_path)
443
444
445def create_temporary_index(tree=None):
446 """Create a temporary index file and return the created file's path.
447
448 If `tree` is not None, use that as the tree to read in. Otherwise, an
449 empty index is created."""
450 gitdir = run('git', 'rev-parse', '--git-dir')
451 path = os.path.join(gitdir, temp_index_basename)
452 if tree is None:
453 tree = '--empty'
454 run('git', 'read-tree', '--index-output='+path, tree)
455 return path
456
457
458def print_diff(old_tree, new_tree):
459 """Print the diff between the two trees to stdout."""
460 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
461 # is expected to be viewed by the user, and only the former does nice things
462 # like color and pagination.
Stephen Hines90ced942016-09-22 05:52:55 +0000463 #
464 # We also only print modified files since `new_tree` only contains the files
465 # that were modified, so unmodified files would show as deleted without the
466 # filter.
467 subprocess.check_call(['git', 'diff', '--diff-filter=M', old_tree, new_tree,
468 '--'])
Daniel Jaspere7a50012013-05-23 17:53:42 +0000469
470
471def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
472 """Apply the changes in `new_tree` to the working directory.
473
474 Bails if there are local changes in those files and not `force`. If
475 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
Stephen Hines90ced942016-09-22 05:52:55 +0000476 changed_files = run('git', 'diff-tree', '--diff-filter=M', '-r', '-z',
477 '--name-only', old_tree,
Daniel Jaspere7a50012013-05-23 17:53:42 +0000478 new_tree).rstrip('\0').split('\0')
479 if not force:
480 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
481 if unstaged_files:
482 print >>sys.stderr, ('The following files would be modified but '
483 'have unstaged changes:')
484 print >>sys.stderr, unstaged_files
485 print >>sys.stderr, 'Please commit, stage, or stash them first.'
486 sys.exit(2)
487 if patch_mode:
488 # In patch mode, we could just as well create an index from the new tree
489 # and checkout from that, but then the user will be presented with a
490 # message saying "Discard ... from worktree". Instead, we use the old
491 # tree as the index and checkout from new_tree, which gives the slightly
492 # better message, "Apply ... to index and worktree". This is not quite
493 # right, since it won't be applied to the user's index, but oh well.
494 with temporary_index_file(old_tree):
495 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
496 index_tree = old_tree
497 else:
498 with temporary_index_file(new_tree):
499 run('git', 'checkout-index', '-a', '-f')
500 return changed_files
501
502
503def run(*args, **kwargs):
504 stdin = kwargs.pop('stdin', '')
505 verbose = kwargs.pop('verbose', True)
506 strip = kwargs.pop('strip', True)
507 for name in kwargs:
508 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
509 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
510 stdin=subprocess.PIPE)
511 stdout, stderr = p.communicate(input=stdin)
512 if p.returncode == 0:
513 if stderr:
514 if verbose:
515 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
516 print >>sys.stderr, stderr.rstrip()
517 if strip:
518 stdout = stdout.rstrip('\r\n')
519 return stdout
520 if verbose:
521 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
522 if stderr:
523 print >>sys.stderr, stderr.rstrip()
524 sys.exit(2)
525
526
527def die(message):
528 print >>sys.stderr, 'error:', message
529 sys.exit(2)
530
531
532if __name__ == '__main__':
533 main()