blob: 860548e71eb39690f8ff6655184a9c59c906855f [file] [log] [blame]
Ed Schoutenbf041d92014-09-02 20:59:13 +00001#!/usr/bin/env python
Daniel Jaspere7a50012013-05-23 17:53:42 +00002#
3#===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===#
4#
5# The LLVM Compiler Infrastructure
6#
7# This file is distributed under the University of Illinois Open Source
8# License. See LICENSE.TXT for details.
9#
10#===------------------------------------------------------------------------===#
11
12r"""
13clang-format git integration
14============================
15
16This file provides a clang-format integration for git. Put it somewhere in your
17path and ensure that it is executable. Then, "git clang-format" will invoke
18clang-format on the changes in current files or a specific commit.
19
20For further details, run:
21git clang-format -h
22
23Requires Python 2.7
24"""
25
26import argparse
27import collections
28import contextlib
29import errno
30import os
31import re
32import subprocess
33import sys
34
35usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]'
36
37desc = '''
38Run clang-format on all lines that differ between the working directory
39and <commit>, which defaults to HEAD. Changes are only applied to the working
40directory.
41
42The following git-config settings set the default of the corresponding option:
43 clangFormat.binary
44 clangFormat.commit
45 clangFormat.extension
46 clangFormat.style
47'''
48
49# Name of the temporary index file in which save the output of clang-format.
50# This file is created within the .git directory.
51temp_index_basename = 'clang-format-index'
52
53
54Range = collections.namedtuple('Range', 'start, count')
55
56
57def main():
58 config = load_git_config()
59
60 # In order to keep '--' yet allow options after positionals, we need to
61 # check for '--' ourselves. (Setting nargs='*' throws away the '--', while
62 # nargs=argparse.REMAINDER disallows options after positionals.)
63 argv = sys.argv[1:]
64 try:
65 idx = argv.index('--')
66 except ValueError:
67 dash_dash = []
68 else:
69 dash_dash = argv[idx:]
70 argv = argv[:idx]
71
72 default_extensions = ','.join([
73 # From clang/lib/Frontend/FrontendOptions.cpp, all lower case
74 'c', 'h', # C
75 'm', # ObjC
76 'mm', # ObjC++
77 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++
Daniel Jasperdfacecb2014-04-09 09:22:35 +000078 # Other languages that clang-format supports
79 'proto', 'protodevel', # Protocol Buffers
Stephen Hines815e9bb2016-09-13 05:00:20 +000080 'java', # Java
Daniel Jasperdfacecb2014-04-09 09:22:35 +000081 'js', # JavaScript
Daniel Jasperc105a9a2015-06-19 08:23:10 +000082 'ts', # TypeScript
Daniel Jaspere7a50012013-05-23 17:53:42 +000083 ])
84
85 p = argparse.ArgumentParser(
86 usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter,
87 description=desc)
88 p.add_argument('--binary',
89 default=config.get('clangformat.binary', 'clang-format'),
90 help='path to clang-format'),
91 p.add_argument('--commit',
92 default=config.get('clangformat.commit', 'HEAD'),
93 help='default commit to use if none is specified'),
94 p.add_argument('--diff', action='store_true',
95 help='print a diff instead of applying the changes')
96 p.add_argument('--extensions',
97 default=config.get('clangformat.extensions',
98 default_extensions),
99 help=('comma-separated list of file extensions to format, '
100 'excluding the period and case-insensitive')),
101 p.add_argument('-f', '--force', action='store_true',
102 help='allow changes to unstaged files')
103 p.add_argument('-p', '--patch', action='store_true',
104 help='select hunks interactively')
105 p.add_argument('-q', '--quiet', action='count', default=0,
106 help='print less information')
107 p.add_argument('--style',
108 default=config.get('clangformat.style', None),
109 help='passed to clang-format'),
110 p.add_argument('-v', '--verbose', action='count', default=0,
111 help='print extra information')
112 # We gather all the remaining positional arguments into 'args' since we need
113 # to use some heuristics to determine whether or not <commit> was present.
114 # However, to print pretty messages, we make use of metavar and help.
115 p.add_argument('args', nargs='*', metavar='<commit>',
116 help='revision from which to compute the diff')
117 p.add_argument('ignored', nargs='*', metavar='<file>...',
118 help='if specified, only consider differences in these files')
119 opts = p.parse_args(argv)
120
121 opts.verbose -= opts.quiet
122 del opts.quiet
123
124 commit, files = interpret_args(opts.args, dash_dash, opts.commit)
125 changed_lines = compute_diff_and_extract_lines(commit, files)
126 if opts.verbose >= 1:
127 ignored_files = set(changed_lines)
128 filter_by_extension(changed_lines, opts.extensions.lower().split(','))
129 if opts.verbose >= 1:
130 ignored_files.difference_update(changed_lines)
131 if ignored_files:
132 print 'Ignoring changes in the following files (wrong extension):'
133 for filename in ignored_files:
134 print ' ', filename
135 if changed_lines:
136 print 'Running clang-format on the following files:'
137 for filename in changed_lines:
138 print ' ', filename
139 if not changed_lines:
140 print 'no modified files to format'
141 return
142 # The computed diff outputs absolute paths, so we must cd before accessing
143 # those files.
144 cd_to_toplevel()
Daniel Jasper695bad542013-08-01 18:17:13 +0000145 old_tree = create_tree_from_workdir(changed_lines)
146 new_tree = run_clang_format_and_save_to_tree(changed_lines,
Daniel Jaspere7a50012013-05-23 17:53:42 +0000147 binary=opts.binary,
148 style=opts.style)
149 if opts.verbose >= 1:
150 print 'old tree:', old_tree
151 print 'new tree:', new_tree
152 if old_tree == new_tree:
153 if opts.verbose >= 0:
154 print 'clang-format did not modify any files'
155 elif opts.diff:
156 print_diff(old_tree, new_tree)
157 else:
158 changed_files = apply_changes(old_tree, new_tree, force=opts.force,
159 patch_mode=opts.patch)
160 if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1:
161 print 'changed files:'
162 for filename in changed_files:
163 print ' ', filename
164
165
166def load_git_config(non_string_options=None):
167 """Return the git configuration as a dictionary.
168
169 All options are assumed to be strings unless in `non_string_options`, in which
170 is a dictionary mapping option name (in lower case) to either "--bool" or
171 "--int"."""
172 if non_string_options is None:
173 non_string_options = {}
174 out = {}
175 for entry in run('git', 'config', '--list', '--null').split('\0'):
176 if entry:
177 name, value = entry.split('\n', 1)
178 if name in non_string_options:
179 value = run('git', 'config', non_string_options[name], name)
180 out[name] = value
181 return out
182
183
184def interpret_args(args, dash_dash, default_commit):
185 """Interpret `args` as "[commit] [--] [files...]" and return (commit, files).
186
187 It is assumed that "--" and everything that follows has been removed from
188 args and placed in `dash_dash`.
189
190 If "--" is present (i.e., `dash_dash` is non-empty), the argument to its
191 left (if present) is taken as commit. Otherwise, the first argument is
192 checked if it is a commit or a file. If commit is not given,
193 `default_commit` is used."""
194 if dash_dash:
195 if len(args) == 0:
196 commit = default_commit
197 elif len(args) > 1:
198 die('at most one commit allowed; %d given' % len(args))
199 else:
200 commit = args[0]
201 object_type = get_object_type(commit)
202 if object_type not in ('commit', 'tag'):
203 if object_type is None:
204 die("'%s' is not a commit" % commit)
205 else:
206 die("'%s' is a %s, but a commit was expected" % (commit, object_type))
207 files = dash_dash[1:]
208 elif args:
209 if disambiguate_revision(args[0]):
210 commit = args[0]
211 files = args[1:]
212 else:
213 commit = default_commit
214 files = args
215 else:
216 commit = default_commit
217 files = []
218 return commit, files
219
220
221def disambiguate_revision(value):
222 """Returns True if `value` is a revision, False if it is a file, or dies."""
223 # If `value` is ambiguous (neither a commit nor a file), the following
224 # command will die with an appropriate error message.
225 run('git', 'rev-parse', value, verbose=False)
226 object_type = get_object_type(value)
227 if object_type is None:
228 return False
229 if object_type in ('commit', 'tag'):
230 return True
231 die('`%s` is a %s, but a commit or filename was expected' %
232 (value, object_type))
233
234
235def get_object_type(value):
236 """Returns a string description of an object's type, or None if it is not
237 a valid git object."""
238 cmd = ['git', 'cat-file', '-t', value]
239 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
240 stdout, stderr = p.communicate()
241 if p.returncode != 0:
242 return None
243 return stdout.strip()
244
245
246def compute_diff_and_extract_lines(commit, files):
247 """Calls compute_diff() followed by extract_lines()."""
248 diff_process = compute_diff(commit, files)
249 changed_lines = extract_lines(diff_process.stdout)
250 diff_process.stdout.close()
251 diff_process.wait()
252 if diff_process.returncode != 0:
253 # Assume error was already printed to stderr.
254 sys.exit(2)
255 return changed_lines
256
257
258def compute_diff(commit, files):
259 """Return a subprocess object producing the diff from `commit`.
260
261 The return value's `stdin` file object will produce a patch with the
262 differences between the working directory and `commit`, filtered on `files`
263 (if non-empty). Zero context lines are used in the patch."""
264 cmd = ['git', 'diff-index', '-p', '-U0', commit, '--']
265 cmd.extend(files)
266 p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
267 p.stdin.close()
268 return p
269
270
271def extract_lines(patch_file):
272 """Extract the changed lines in `patch_file`.
273
Daniel Jasper695bad542013-08-01 18:17:13 +0000274 The return value is a dictionary mapping filename to a list of (start_line,
275 line_count) pairs.
276
Daniel Jaspere7a50012013-05-23 17:53:42 +0000277 The input must have been produced with ``-U0``, meaning unidiff format with
278 zero lines of context. The return value is a dict mapping filename to a
279 list of line `Range`s."""
280 matches = {}
281 for line in patch_file:
282 match = re.search(r'^\+\+\+\ [^/]+/(.*)', line)
283 if match:
284 filename = match.group(1).rstrip('\r\n')
285 match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line)
286 if match:
287 start_line = int(match.group(1))
288 line_count = 1
289 if match.group(3):
290 line_count = int(match.group(3))
291 if line_count > 0:
292 matches.setdefault(filename, []).append(Range(start_line, line_count))
293 return matches
294
295
296def filter_by_extension(dictionary, allowed_extensions):
297 """Delete every key in `dictionary` that doesn't have an allowed extension.
298
299 `allowed_extensions` must be a collection of lowercase file extensions,
300 excluding the period."""
301 allowed_extensions = frozenset(allowed_extensions)
302 for filename in dictionary.keys():
303 base_ext = filename.rsplit('.', 1)
304 if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions:
305 del dictionary[filename]
306
307
308def cd_to_toplevel():
309 """Change to the top level of the git repository."""
310 toplevel = run('git', 'rev-parse', '--show-toplevel')
311 os.chdir(toplevel)
312
313
Daniel Jaspere7a50012013-05-23 17:53:42 +0000314def create_tree_from_workdir(filenames):
315 """Create a new git tree with the given files from the working directory.
316
317 Returns the object ID (SHA-1) of the created tree."""
318 return create_tree(filenames, '--stdin')
319
320
Daniel Jasper695bad542013-08-01 18:17:13 +0000321def run_clang_format_and_save_to_tree(changed_lines, binary='clang-format',
Daniel Jaspere7a50012013-05-23 17:53:42 +0000322 style=None):
323 """Run clang-format on each file and save the result to a git tree.
324
325 Returns the object ID (SHA-1) of the created tree."""
326 def index_info_generator():
Daniel Jasper695bad542013-08-01 18:17:13 +0000327 for filename, line_ranges in changed_lines.iteritems():
Daniel Jaspere7a50012013-05-23 17:53:42 +0000328 mode = oct(os.stat(filename).st_mode)
Daniel Jasper695bad542013-08-01 18:17:13 +0000329 blob_id = clang_format_to_blob(filename, line_ranges, binary=binary,
Daniel Jaspere7a50012013-05-23 17:53:42 +0000330 style=style)
331 yield '%s %s\t%s' % (mode, blob_id, filename)
332 return create_tree(index_info_generator(), '--index-info')
333
334
335def create_tree(input_lines, mode):
336 """Create a tree object from the given input.
337
338 If mode is '--stdin', it must be a list of filenames. If mode is
339 '--index-info' is must be a list of values suitable for "git update-index
340 --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode
341 is invalid."""
342 assert mode in ('--stdin', '--index-info')
343 cmd = ['git', 'update-index', '--add', '-z', mode]
344 with temporary_index_file():
345 p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
346 for line in input_lines:
347 p.stdin.write('%s\0' % line)
348 p.stdin.close()
349 if p.wait() != 0:
350 die('`%s` failed' % ' '.join(cmd))
351 tree_id = run('git', 'write-tree')
352 return tree_id
353
354
Daniel Jasper695bad542013-08-01 18:17:13 +0000355def clang_format_to_blob(filename, line_ranges, binary='clang-format',
Daniel Jaspere7a50012013-05-23 17:53:42 +0000356 style=None):
357 """Run clang-format on the given file and save the result to a git blob.
358
359 Returns the object ID (SHA-1) of the created blob."""
360 clang_format_cmd = [binary, filename]
361 if style:
362 clang_format_cmd.extend(['-style='+style])
Daniel Jasper695bad542013-08-01 18:17:13 +0000363 clang_format_cmd.extend([
364 '-lines=%s:%s' % (start_line, start_line+line_count-1)
365 for start_line, line_count in line_ranges])
Daniel Jaspere7a50012013-05-23 17:53:42 +0000366 try:
367 clang_format = subprocess.Popen(clang_format_cmd, stdin=subprocess.PIPE,
368 stdout=subprocess.PIPE)
369 except OSError as e:
370 if e.errno == errno.ENOENT:
371 die('cannot find executable "%s"' % binary)
372 else:
373 raise
374 clang_format.stdin.close()
375 hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, '--stdin']
376 hash_object = subprocess.Popen(hash_object_cmd, stdin=clang_format.stdout,
377 stdout=subprocess.PIPE)
378 clang_format.stdout.close()
379 stdout = hash_object.communicate()[0]
380 if hash_object.returncode != 0:
381 die('`%s` failed' % ' '.join(hash_object_cmd))
382 if clang_format.wait() != 0:
383 die('`%s` failed' % ' '.join(clang_format_cmd))
384 return stdout.rstrip('\r\n')
385
386
387@contextlib.contextmanager
388def temporary_index_file(tree=None):
389 """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting
390 the file afterward."""
391 index_path = create_temporary_index(tree)
392 old_index_path = os.environ.get('GIT_INDEX_FILE')
393 os.environ['GIT_INDEX_FILE'] = index_path
394 try:
395 yield
396 finally:
397 if old_index_path is None:
398 del os.environ['GIT_INDEX_FILE']
399 else:
400 os.environ['GIT_INDEX_FILE'] = old_index_path
401 os.remove(index_path)
402
403
404def create_temporary_index(tree=None):
405 """Create a temporary index file and return the created file's path.
406
407 If `tree` is not None, use that as the tree to read in. Otherwise, an
408 empty index is created."""
409 gitdir = run('git', 'rev-parse', '--git-dir')
410 path = os.path.join(gitdir, temp_index_basename)
411 if tree is None:
412 tree = '--empty'
413 run('git', 'read-tree', '--index-output='+path, tree)
414 return path
415
416
417def print_diff(old_tree, new_tree):
418 """Print the diff between the two trees to stdout."""
419 # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output
420 # is expected to be viewed by the user, and only the former does nice things
421 # like color and pagination.
422 subprocess.check_call(['git', 'diff', old_tree, new_tree, '--'])
423
424
425def apply_changes(old_tree, new_tree, force=False, patch_mode=False):
426 """Apply the changes in `new_tree` to the working directory.
427
428 Bails if there are local changes in those files and not `force`. If
429 `patch_mode`, runs `git checkout --patch` to select hunks interactively."""
430 changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', old_tree,
431 new_tree).rstrip('\0').split('\0')
432 if not force:
433 unstaged_files = run('git', 'diff-files', '--name-status', *changed_files)
434 if unstaged_files:
435 print >>sys.stderr, ('The following files would be modified but '
436 'have unstaged changes:')
437 print >>sys.stderr, unstaged_files
438 print >>sys.stderr, 'Please commit, stage, or stash them first.'
439 sys.exit(2)
440 if patch_mode:
441 # In patch mode, we could just as well create an index from the new tree
442 # and checkout from that, but then the user will be presented with a
443 # message saying "Discard ... from worktree". Instead, we use the old
444 # tree as the index and checkout from new_tree, which gives the slightly
445 # better message, "Apply ... to index and worktree". This is not quite
446 # right, since it won't be applied to the user's index, but oh well.
447 with temporary_index_file(old_tree):
448 subprocess.check_call(['git', 'checkout', '--patch', new_tree])
449 index_tree = old_tree
450 else:
451 with temporary_index_file(new_tree):
452 run('git', 'checkout-index', '-a', '-f')
453 return changed_files
454
455
456def run(*args, **kwargs):
457 stdin = kwargs.pop('stdin', '')
458 verbose = kwargs.pop('verbose', True)
459 strip = kwargs.pop('strip', True)
460 for name in kwargs:
461 raise TypeError("run() got an unexpected keyword argument '%s'" % name)
462 p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
463 stdin=subprocess.PIPE)
464 stdout, stderr = p.communicate(input=stdin)
465 if p.returncode == 0:
466 if stderr:
467 if verbose:
468 print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args)
469 print >>sys.stderr, stderr.rstrip()
470 if strip:
471 stdout = stdout.rstrip('\r\n')
472 return stdout
473 if verbose:
474 print >>sys.stderr, '`%s` returned %s' % (' '.join(args), p.returncode)
475 if stderr:
476 print >>sys.stderr, stderr.rstrip()
477 sys.exit(2)
478
479
480def die(message):
481 print >>sys.stderr, 'error:', message
482 sys.exit(2)
483
484
485if __name__ == '__main__':
486 main()