blob: 17fbc0951e4cceeb1cf52460bb0aa04ee0e06e1b [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to capture the compiler invocation of any
7build process. The result of that should be a compilation database.
8
9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10mechanisms provided by the dynamic linker. The related library is implemented
11in C language and can be found under 'libear' directory.
12
13The 'libear' library is capturing all child process creation and logging the
14relevant information about it into separate files in a specified directory.
15The parameter of this process is the output directory name, where the report
16files shall be placed. This parameter is passed as an environment variable.
17
18The module also implements compiler wrappers to intercept the compiler calls.
19
20The module implements the build command execution and the post-processing of
21the output files, which will condensates into a compilation database. """
22
23import sys
24import os
25import os.path
26import re
27import itertools
28import json
29import glob
30import argparse
31import logging
32import subprocess
33from libear import build_libear, TemporaryDirectory
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000034from libscanbuild import command_entry_point, run_command
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000035from libscanbuild import duplicate_check, tempdir, initialize_logging
36from libscanbuild.compilation import split_command
Laszlo Nagybc687582016-01-12 22:38:41 +000037from libscanbuild.shell import encode, decode
38
39__all__ = ['capture', 'intercept_build_main', 'intercept_build_wrapper']
40
41GS = chr(0x1d)
42RS = chr(0x1e)
43US = chr(0x1f)
44
45COMPILER_WRAPPER_CC = 'intercept-cc'
46COMPILER_WRAPPER_CXX = 'intercept-c++'
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000047WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
Laszlo Nagybc687582016-01-12 22:38:41 +000048
49
50@command_entry_point
51def intercept_build_main(bin_dir):
52 """ Entry point for 'intercept-build' command. """
53
54 parser = create_parser()
55 args = parser.parse_args()
56
57 initialize_logging(args.verbose)
58 logging.debug('Parsed arguments: %s', args)
59
60 if not args.build:
61 parser.print_help()
62 return 0
63
64 return capture(args, bin_dir)
65
66
67def capture(args, bin_dir):
68 """ The entry point of build command interception. """
69
70 def post_processing(commands):
71 """ To make a compilation database, it needs to filter out commands
72 which are not compiler calls. Needs to find the source file name
73 from the arguments. And do shell escaping on the command.
74
75 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000076 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000077 shall be merged with the new elements. """
78
79 # create entries from the current run
80 current = itertools.chain.from_iterable(
81 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000082 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000083 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000084 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000085 with open(args.cdb) as handle:
86 previous = iter(json.load(handle))
87 else:
88 previous = iter([])
89 # filter out duplicate entries from both
90 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000091 return (entry
92 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000093 if os.path.exists(entry['file']) and not duplicate(entry))
94
95 with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir:
96 # run the build command
97 environment = setup_environment(args, tmp_dir, bin_dir)
98 logging.debug('run build in environment: %s', environment)
99 exit_code = subprocess.call(args.build, env=environment)
100 logging.info('build finished with exit code: %d', exit_code)
101 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000102 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +0000103 parse_exec_trace(os.path.join(tmp_dir, filename))
104 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
Laszlo Nagy3a556112017-01-29 04:59:32 +0000105 # do post processing
106 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +0000107 # dump the compilation database
108 with open(args.cdb, 'w+') as handle:
109 json.dump(list(entries), handle, sort_keys=True, indent=4)
110 return exit_code
111
112
113def setup_environment(args, destination, bin_dir):
114 """ Sets up the environment for the build command.
115
116 It sets the required environment variables and execute the given command.
117 The exec calls will be logged by the 'libear' preloaded library or by the
118 'wrapper' programs. """
119
120 c_compiler = args.cc if 'cc' in args else 'cc'
121 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
122
123 libear_path = None if args.override_compiler or is_preload_disabled(
124 sys.platform) else build_libear(c_compiler, destination)
125
126 environment = dict(os.environ)
127 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
128
129 if not libear_path:
130 logging.debug('intercept gonna use compiler wrappers')
131 environment.update({
132 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC),
133 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX),
134 'INTERCEPT_BUILD_CC': c_compiler,
135 'INTERCEPT_BUILD_CXX': cxx_compiler,
136 'INTERCEPT_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'INFO'
137 })
138 elif sys.platform == 'darwin':
139 logging.debug('intercept gonna preload libear on OSX')
140 environment.update({
141 'DYLD_INSERT_LIBRARIES': libear_path,
142 'DYLD_FORCE_FLAT_NAMESPACE': '1'
143 })
144 else:
145 logging.debug('intercept gonna preload libear on UNIX')
146 environment.update({'LD_PRELOAD': libear_path})
147
148 return environment
149
150
151def intercept_build_wrapper(cplusplus):
152 """ Entry point for `intercept-cc` and `intercept-c++` compiler wrappers.
153
154 It does generate execution report into target directory. And execute
155 the wrapped compilation with the real compiler. The parameters for
156 report and execution are from environment variables.
157
158 Those parameters which for 'libear' library can't have meaningful
159 values are faked. """
160
161 # initialize wrapper logging
162 logging.basicConfig(format='intercept: %(levelname)s: %(message)s',
163 level=os.getenv('INTERCEPT_BUILD_VERBOSE', 'INFO'))
164 # write report
165 try:
166 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
167 if not target_dir:
168 raise UserWarning('exec report target directory not found')
169 pid = str(os.getpid())
170 target_file = os.path.join(target_dir, pid + '.cmd')
171 logging.debug('writing exec report to: %s', target_file)
172 with open(target_file, 'ab') as handler:
173 working_dir = os.getcwd()
174 command = US.join(sys.argv) + US
175 content = RS.join([pid, pid, 'wrapper', working_dir, command]) + GS
176 handler.write(content.encode('utf-8'))
177 except IOError:
178 logging.exception('writing exec report failed')
179 except UserWarning as warning:
180 logging.warning(warning)
181 # execute with real compiler
182 compiler = os.getenv('INTERCEPT_BUILD_CXX', 'c++') if cplusplus \
183 else os.getenv('INTERCEPT_BUILD_CC', 'cc')
184 compilation = [compiler] + sys.argv[1:]
185 logging.debug('execute compiler: %s', compilation)
186 return subprocess.call(compilation)
187
188
189def parse_exec_trace(filename):
190 """ Parse the file generated by the 'libear' preloaded library.
191
192 Given filename points to a file which contains the basic report
193 generated by the interception library or wrapper command. A single
194 report file _might_ contain multiple process creation info. """
195
196 logging.debug('parse exec trace file: %s', filename)
197 with open(filename, 'r') as handler:
198 content = handler.read()
199 for group in filter(bool, content.split(GS)):
200 records = group.split(RS)
201 yield {
202 'pid': records[0],
203 'ppid': records[1],
204 'function': records[2],
205 'directory': records[3],
206 'command': records[4].split(US)[:-1]
207 }
208
209
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000210def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000211 """ Generate the desired fields for compilation database entries. """
212
213 def abspath(cwd, name):
214 """ Create normalized absolute path from input filename. """
215 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
216 return os.path.normpath(fullname)
217
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000218 logging.debug('format this command: %s', exec_trace['command'])
219 compilation = split_command(exec_trace['command'])
220 if compilation:
221 for source in compilation.files:
222 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
223 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000224 logging.debug('formated as: %s', command)
225 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000226 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000227 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000228 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000229 }
230
231
Laszlo Nagybc687582016-01-12 22:38:41 +0000232def is_preload_disabled(platform):
233 """ Library-based interposition will fail silently if SIP is enabled,
234 so this should be detected. You can detect whether SIP is enabled on
235 Darwin by checking whether (1) there is a binary called 'csrutil' in
236 the path and, if so, (2) whether the output of executing 'csrutil status'
237 contains 'System Integrity Protection status: enabled'.
238
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000239 :param platform: name of the platform (returned by sys.platform),
240 :return: True if library preload will fail by the dynamic linker. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000241
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000242 if platform in WRAPPER_ONLY_PLATFORMS:
243 return True
244 elif platform == 'darwin':
Laszlo Nagybc687582016-01-12 22:38:41 +0000245 command = ['csrutil', 'status']
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000246 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
247 try:
248 return any(pattern.match(line) for line in run_command(command))
249 except:
250 return False
Laszlo Nagybc687582016-01-12 22:38:41 +0000251 else:
252 return False
253
Laszlo Nagybc687582016-01-12 22:38:41 +0000254
255def entry_hash(entry):
256 """ Implement unique hash method for compilation database entries. """
257
258 # For faster lookup in set filename is reverted
259 filename = entry['file'][::-1]
260 # For faster lookup in set directory is reverted
261 directory = entry['directory'][::-1]
262 # On OS X the 'cc' and 'c++' compilers are wrappers for
263 # 'clang' therefore both call would be logged. To avoid
264 # this the hash does not contain the first word of the
265 # command.
266 command = ' '.join(decode(entry['command'])[1:])
267
268 return '<>'.join([filename, directory, command])
269
270
271def create_parser():
272 """ Command line argument parser factory method. """
273
274 parser = argparse.ArgumentParser(
275 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
276
277 parser.add_argument(
278 '--verbose', '-v',
279 action='count',
280 default=0,
281 help="""Enable verbose output from '%(prog)s'. A second and third
282 flag increases verbosity.""")
283 parser.add_argument(
284 '--cdb',
285 metavar='<file>',
286 default="compile_commands.json",
287 help="""The JSON compilation database.""")
288 group = parser.add_mutually_exclusive_group()
289 group.add_argument(
290 '--append',
291 action='store_true',
292 help="""Append new entries to existing compilation database.""")
Laszlo Nagybc687582016-01-12 22:38:41 +0000293
294 advanced = parser.add_argument_group('advanced options')
295 advanced.add_argument(
296 '--override-compiler',
297 action='store_true',
298 help="""Always resort to the compiler wrapper even when better
299 intercept methods are available.""")
300 advanced.add_argument(
301 '--use-cc',
302 metavar='<path>',
303 dest='cc',
304 default='cc',
305 help="""When '%(prog)s' analyzes a project by interposing a compiler
306 wrapper, which executes a real compiler for compilation and
307 do other tasks (record the compiler invocation). Because of
308 this interposing, '%(prog)s' does not know what compiler your
309 project normally uses. Instead, it simply overrides the CC
310 environment variable, and guesses your default compiler.
311
312 If you need '%(prog)s' to use a specific compiler for
313 *compilation* then you can use this option to specify a path
314 to that compiler.""")
315 advanced.add_argument(
316 '--use-c++',
317 metavar='<path>',
318 dest='cxx',
319 default='c++',
320 help="""This is the same as "--use-cc" but for C++ code.""")
321
322 parser.add_argument(
323 dest='build',
324 nargs=argparse.REMAINDER,
325 help="""Command to run.""")
326
327 return parser