blob: 2aef79fe581ca90658f5d967f5890ebcf39aab71 [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to capture the compiler invocation of any
7build process. The result of that should be a compilation database.
8
9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10mechanisms provided by the dynamic linker. The related library is implemented
11in C language and can be found under 'libear' directory.
12
13The 'libear' library is capturing all child process creation and logging the
14relevant information about it into separate files in a specified directory.
15The parameter of this process is the output directory name, where the report
16files shall be placed. This parameter is passed as an environment variable.
17
18The module also implements compiler wrappers to intercept the compiler calls.
19
20The module implements the build command execution and the post-processing of
21the output files, which will condensates into a compilation database. """
22
23import sys
24import os
25import os.path
26import re
27import itertools
28import json
29import glob
30import argparse
31import logging
32import subprocess
33from libear import build_libear, TemporaryDirectory
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000034from libscanbuild import command_entry_point, run_command
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000035from libscanbuild import duplicate_check, tempdir, initialize_logging
36from libscanbuild.compilation import split_command
Laszlo Nagybc687582016-01-12 22:38:41 +000037from libscanbuild.shell import encode, decode
38
39__all__ = ['capture', 'intercept_build_main', 'intercept_build_wrapper']
40
41GS = chr(0x1d)
42RS = chr(0x1e)
43US = chr(0x1f)
44
45COMPILER_WRAPPER_CC = 'intercept-cc'
46COMPILER_WRAPPER_CXX = 'intercept-c++'
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000047WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
Laszlo Nagybc687582016-01-12 22:38:41 +000048
49
50@command_entry_point
51def intercept_build_main(bin_dir):
52 """ Entry point for 'intercept-build' command. """
53
54 parser = create_parser()
55 args = parser.parse_args()
56
57 initialize_logging(args.verbose)
58 logging.debug('Parsed arguments: %s', args)
59
60 if not args.build:
61 parser.print_help()
62 return 0
63
64 return capture(args, bin_dir)
65
66
67def capture(args, bin_dir):
68 """ The entry point of build command interception. """
69
70 def post_processing(commands):
71 """ To make a compilation database, it needs to filter out commands
72 which are not compiler calls. Needs to find the source file name
73 from the arguments. And do shell escaping on the command.
74
75 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000076 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000077 shall be merged with the new elements. """
78
79 # create entries from the current run
80 current = itertools.chain.from_iterable(
81 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000082 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000083 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000084 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000085 with open(args.cdb) as handle:
86 previous = iter(json.load(handle))
87 else:
88 previous = iter([])
89 # filter out duplicate entries from both
90 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000091 return (entry
92 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000093 if os.path.exists(entry['file']) and not duplicate(entry))
94
95 with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir:
96 # run the build command
97 environment = setup_environment(args, tmp_dir, bin_dir)
98 logging.debug('run build in environment: %s', environment)
99 exit_code = subprocess.call(args.build, env=environment)
100 logging.info('build finished with exit code: %d', exit_code)
101 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000102 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +0000103 parse_exec_trace(os.path.join(tmp_dir, filename))
104 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
105 # do post processing only if that was requested
106 if 'raw_entries' not in args or not args.raw_entries:
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000107 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +0000108 else:
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000109 entries = exec_traces
Laszlo Nagybc687582016-01-12 22:38:41 +0000110 # dump the compilation database
111 with open(args.cdb, 'w+') as handle:
112 json.dump(list(entries), handle, sort_keys=True, indent=4)
113 return exit_code
114
115
116def setup_environment(args, destination, bin_dir):
117 """ Sets up the environment for the build command.
118
119 It sets the required environment variables and execute the given command.
120 The exec calls will be logged by the 'libear' preloaded library or by the
121 'wrapper' programs. """
122
123 c_compiler = args.cc if 'cc' in args else 'cc'
124 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
125
126 libear_path = None if args.override_compiler or is_preload_disabled(
127 sys.platform) else build_libear(c_compiler, destination)
128
129 environment = dict(os.environ)
130 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
131
132 if not libear_path:
133 logging.debug('intercept gonna use compiler wrappers')
134 environment.update({
135 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC),
136 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX),
137 'INTERCEPT_BUILD_CC': c_compiler,
138 'INTERCEPT_BUILD_CXX': cxx_compiler,
139 'INTERCEPT_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'INFO'
140 })
141 elif sys.platform == 'darwin':
142 logging.debug('intercept gonna preload libear on OSX')
143 environment.update({
144 'DYLD_INSERT_LIBRARIES': libear_path,
145 'DYLD_FORCE_FLAT_NAMESPACE': '1'
146 })
147 else:
148 logging.debug('intercept gonna preload libear on UNIX')
149 environment.update({'LD_PRELOAD': libear_path})
150
151 return environment
152
153
154def intercept_build_wrapper(cplusplus):
155 """ Entry point for `intercept-cc` and `intercept-c++` compiler wrappers.
156
157 It does generate execution report into target directory. And execute
158 the wrapped compilation with the real compiler. The parameters for
159 report and execution are from environment variables.
160
161 Those parameters which for 'libear' library can't have meaningful
162 values are faked. """
163
164 # initialize wrapper logging
165 logging.basicConfig(format='intercept: %(levelname)s: %(message)s',
166 level=os.getenv('INTERCEPT_BUILD_VERBOSE', 'INFO'))
167 # write report
168 try:
169 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
170 if not target_dir:
171 raise UserWarning('exec report target directory not found')
172 pid = str(os.getpid())
173 target_file = os.path.join(target_dir, pid + '.cmd')
174 logging.debug('writing exec report to: %s', target_file)
175 with open(target_file, 'ab') as handler:
176 working_dir = os.getcwd()
177 command = US.join(sys.argv) + US
178 content = RS.join([pid, pid, 'wrapper', working_dir, command]) + GS
179 handler.write(content.encode('utf-8'))
180 except IOError:
181 logging.exception('writing exec report failed')
182 except UserWarning as warning:
183 logging.warning(warning)
184 # execute with real compiler
185 compiler = os.getenv('INTERCEPT_BUILD_CXX', 'c++') if cplusplus \
186 else os.getenv('INTERCEPT_BUILD_CC', 'cc')
187 compilation = [compiler] + sys.argv[1:]
188 logging.debug('execute compiler: %s', compilation)
189 return subprocess.call(compilation)
190
191
192def parse_exec_trace(filename):
193 """ Parse the file generated by the 'libear' preloaded library.
194
195 Given filename points to a file which contains the basic report
196 generated by the interception library or wrapper command. A single
197 report file _might_ contain multiple process creation info. """
198
199 logging.debug('parse exec trace file: %s', filename)
200 with open(filename, 'r') as handler:
201 content = handler.read()
202 for group in filter(bool, content.split(GS)):
203 records = group.split(RS)
204 yield {
205 'pid': records[0],
206 'ppid': records[1],
207 'function': records[2],
208 'directory': records[3],
209 'command': records[4].split(US)[:-1]
210 }
211
212
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000213def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000214 """ Generate the desired fields for compilation database entries. """
215
216 def abspath(cwd, name):
217 """ Create normalized absolute path from input filename. """
218 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
219 return os.path.normpath(fullname)
220
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000221 logging.debug('format this command: %s', exec_trace['command'])
222 compilation = split_command(exec_trace['command'])
223 if compilation:
224 for source in compilation.files:
225 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
226 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000227 logging.debug('formated as: %s', command)
228 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000229 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000230 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000231 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000232 }
233
234
Laszlo Nagybc687582016-01-12 22:38:41 +0000235def is_preload_disabled(platform):
236 """ Library-based interposition will fail silently if SIP is enabled,
237 so this should be detected. You can detect whether SIP is enabled on
238 Darwin by checking whether (1) there is a binary called 'csrutil' in
239 the path and, if so, (2) whether the output of executing 'csrutil status'
240 contains 'System Integrity Protection status: enabled'.
241
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000242 :param platform: name of the platform (returned by sys.platform),
243 :return: True if library preload will fail by the dynamic linker. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000244
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000245 if platform in WRAPPER_ONLY_PLATFORMS:
246 return True
247 elif platform == 'darwin':
Laszlo Nagybc687582016-01-12 22:38:41 +0000248 command = ['csrutil', 'status']
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000249 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
250 try:
251 return any(pattern.match(line) for line in run_command(command))
252 except:
253 return False
Laszlo Nagybc687582016-01-12 22:38:41 +0000254 else:
255 return False
256
Laszlo Nagybc687582016-01-12 22:38:41 +0000257
258def entry_hash(entry):
259 """ Implement unique hash method for compilation database entries. """
260
261 # For faster lookup in set filename is reverted
262 filename = entry['file'][::-1]
263 # For faster lookup in set directory is reverted
264 directory = entry['directory'][::-1]
265 # On OS X the 'cc' and 'c++' compilers are wrappers for
266 # 'clang' therefore both call would be logged. To avoid
267 # this the hash does not contain the first word of the
268 # command.
269 command = ' '.join(decode(entry['command'])[1:])
270
271 return '<>'.join([filename, directory, command])
272
273
274def create_parser():
275 """ Command line argument parser factory method. """
276
277 parser = argparse.ArgumentParser(
278 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
279
280 parser.add_argument(
281 '--verbose', '-v',
282 action='count',
283 default=0,
284 help="""Enable verbose output from '%(prog)s'. A second and third
285 flag increases verbosity.""")
286 parser.add_argument(
287 '--cdb',
288 metavar='<file>',
289 default="compile_commands.json",
290 help="""The JSON compilation database.""")
291 group = parser.add_mutually_exclusive_group()
292 group.add_argument(
293 '--append',
294 action='store_true',
295 help="""Append new entries to existing compilation database.""")
296 group.add_argument(
297 '--disable-filter', '-n',
298 dest='raw_entries',
299 action='store_true',
300 help="""Intercepted child process creation calls (exec calls) are all
301 logged to the output. The output is not a compilation database.
302 This flag is for debug purposes.""")
303
304 advanced = parser.add_argument_group('advanced options')
305 advanced.add_argument(
306 '--override-compiler',
307 action='store_true',
308 help="""Always resort to the compiler wrapper even when better
309 intercept methods are available.""")
310 advanced.add_argument(
311 '--use-cc',
312 metavar='<path>',
313 dest='cc',
314 default='cc',
315 help="""When '%(prog)s' analyzes a project by interposing a compiler
316 wrapper, which executes a real compiler for compilation and
317 do other tasks (record the compiler invocation). Because of
318 this interposing, '%(prog)s' does not know what compiler your
319 project normally uses. Instead, it simply overrides the CC
320 environment variable, and guesses your default compiler.
321
322 If you need '%(prog)s' to use a specific compiler for
323 *compilation* then you can use this option to specify a path
324 to that compiler.""")
325 advanced.add_argument(
326 '--use-c++',
327 metavar='<path>',
328 dest='cxx',
329 default='c++',
330 help="""This is the same as "--use-cc" but for C++ code.""")
331
332 parser.add_argument(
333 dest='build',
334 nargs=argparse.REMAINDER,
335 help="""Command to run.""")
336
337 return parser