blob: 6a9f75349fb584c54ab0b3dfc8e5242ad9147fa5 [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to capture the compiler invocation of any
7build process. The result of that should be a compilation database.
8
9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10mechanisms provided by the dynamic linker. The related library is implemented
11in C language and can be found under 'libear' directory.
12
13The 'libear' library is capturing all child process creation and logging the
14relevant information about it into separate files in a specified directory.
15The parameter of this process is the output directory name, where the report
16files shall be placed. This parameter is passed as an environment variable.
17
18The module also implements compiler wrappers to intercept the compiler calls.
19
20The module implements the build command execution and the post-processing of
21the output files, which will condensates into a compilation database. """
22
23import sys
24import os
25import os.path
26import re
27import itertools
28import json
29import glob
30import argparse
31import logging
32import subprocess
33from libear import build_libear, TemporaryDirectory
Laszlo Nagybc687582016-01-12 22:38:41 +000034from libscanbuild import command_entry_point
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000035from libscanbuild import duplicate_check, tempdir, initialize_logging
36from libscanbuild.compilation import split_command
Laszlo Nagybc687582016-01-12 22:38:41 +000037from libscanbuild.shell import encode, decode
38
39__all__ = ['capture', 'intercept_build_main', 'intercept_build_wrapper']
40
41GS = chr(0x1d)
42RS = chr(0x1e)
43US = chr(0x1f)
44
45COMPILER_WRAPPER_CC = 'intercept-cc'
46COMPILER_WRAPPER_CXX = 'intercept-c++'
47
48
49@command_entry_point
50def intercept_build_main(bin_dir):
51 """ Entry point for 'intercept-build' command. """
52
53 parser = create_parser()
54 args = parser.parse_args()
55
56 initialize_logging(args.verbose)
57 logging.debug('Parsed arguments: %s', args)
58
59 if not args.build:
60 parser.print_help()
61 return 0
62
63 return capture(args, bin_dir)
64
65
66def capture(args, bin_dir):
67 """ The entry point of build command interception. """
68
69 def post_processing(commands):
70 """ To make a compilation database, it needs to filter out commands
71 which are not compiler calls. Needs to find the source file name
72 from the arguments. And do shell escaping on the command.
73
74 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000075 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000076 shall be merged with the new elements. """
77
78 # create entries from the current run
79 current = itertools.chain.from_iterable(
80 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000081 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000082 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000083 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000084 with open(args.cdb) as handle:
85 previous = iter(json.load(handle))
86 else:
87 previous = iter([])
88 # filter out duplicate entries from both
89 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000090 return (entry
91 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000092 if os.path.exists(entry['file']) and not duplicate(entry))
93
94 with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir:
95 # run the build command
96 environment = setup_environment(args, tmp_dir, bin_dir)
97 logging.debug('run build in environment: %s', environment)
98 exit_code = subprocess.call(args.build, env=environment)
99 logging.info('build finished with exit code: %d', exit_code)
100 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000101 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +0000102 parse_exec_trace(os.path.join(tmp_dir, filename))
103 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
104 # do post processing only if that was requested
105 if 'raw_entries' not in args or not args.raw_entries:
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000106 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +0000107 else:
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000108 entries = exec_traces
Laszlo Nagybc687582016-01-12 22:38:41 +0000109 # dump the compilation database
110 with open(args.cdb, 'w+') as handle:
111 json.dump(list(entries), handle, sort_keys=True, indent=4)
112 return exit_code
113
114
115def setup_environment(args, destination, bin_dir):
116 """ Sets up the environment for the build command.
117
118 It sets the required environment variables and execute the given command.
119 The exec calls will be logged by the 'libear' preloaded library or by the
120 'wrapper' programs. """
121
122 c_compiler = args.cc if 'cc' in args else 'cc'
123 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
124
125 libear_path = None if args.override_compiler or is_preload_disabled(
126 sys.platform) else build_libear(c_compiler, destination)
127
128 environment = dict(os.environ)
129 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
130
131 if not libear_path:
132 logging.debug('intercept gonna use compiler wrappers')
133 environment.update({
134 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC),
135 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX),
136 'INTERCEPT_BUILD_CC': c_compiler,
137 'INTERCEPT_BUILD_CXX': cxx_compiler,
138 'INTERCEPT_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'INFO'
139 })
140 elif sys.platform == 'darwin':
141 logging.debug('intercept gonna preload libear on OSX')
142 environment.update({
143 'DYLD_INSERT_LIBRARIES': libear_path,
144 'DYLD_FORCE_FLAT_NAMESPACE': '1'
145 })
146 else:
147 logging.debug('intercept gonna preload libear on UNIX')
148 environment.update({'LD_PRELOAD': libear_path})
149
150 return environment
151
152
153def intercept_build_wrapper(cplusplus):
154 """ Entry point for `intercept-cc` and `intercept-c++` compiler wrappers.
155
156 It does generate execution report into target directory. And execute
157 the wrapped compilation with the real compiler. The parameters for
158 report and execution are from environment variables.
159
160 Those parameters which for 'libear' library can't have meaningful
161 values are faked. """
162
163 # initialize wrapper logging
164 logging.basicConfig(format='intercept: %(levelname)s: %(message)s',
165 level=os.getenv('INTERCEPT_BUILD_VERBOSE', 'INFO'))
166 # write report
167 try:
168 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
169 if not target_dir:
170 raise UserWarning('exec report target directory not found')
171 pid = str(os.getpid())
172 target_file = os.path.join(target_dir, pid + '.cmd')
173 logging.debug('writing exec report to: %s', target_file)
174 with open(target_file, 'ab') as handler:
175 working_dir = os.getcwd()
176 command = US.join(sys.argv) + US
177 content = RS.join([pid, pid, 'wrapper', working_dir, command]) + GS
178 handler.write(content.encode('utf-8'))
179 except IOError:
180 logging.exception('writing exec report failed')
181 except UserWarning as warning:
182 logging.warning(warning)
183 # execute with real compiler
184 compiler = os.getenv('INTERCEPT_BUILD_CXX', 'c++') if cplusplus \
185 else os.getenv('INTERCEPT_BUILD_CC', 'cc')
186 compilation = [compiler] + sys.argv[1:]
187 logging.debug('execute compiler: %s', compilation)
188 return subprocess.call(compilation)
189
190
191def parse_exec_trace(filename):
192 """ Parse the file generated by the 'libear' preloaded library.
193
194 Given filename points to a file which contains the basic report
195 generated by the interception library or wrapper command. A single
196 report file _might_ contain multiple process creation info. """
197
198 logging.debug('parse exec trace file: %s', filename)
199 with open(filename, 'r') as handler:
200 content = handler.read()
201 for group in filter(bool, content.split(GS)):
202 records = group.split(RS)
203 yield {
204 'pid': records[0],
205 'ppid': records[1],
206 'function': records[2],
207 'directory': records[3],
208 'command': records[4].split(US)[:-1]
209 }
210
211
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000212def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000213 """ Generate the desired fields for compilation database entries. """
214
215 def abspath(cwd, name):
216 """ Create normalized absolute path from input filename. """
217 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
218 return os.path.normpath(fullname)
219
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000220 logging.debug('format this command: %s', exec_trace['command'])
221 compilation = split_command(exec_trace['command'])
222 if compilation:
223 for source in compilation.files:
224 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
225 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000226 logging.debug('formated as: %s', command)
227 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000228 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000229 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000230 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000231 }
232
233
Laszlo Nagybc687582016-01-12 22:38:41 +0000234def is_preload_disabled(platform):
235 """ Library-based interposition will fail silently if SIP is enabled,
236 so this should be detected. You can detect whether SIP is enabled on
237 Darwin by checking whether (1) there is a binary called 'csrutil' in
238 the path and, if so, (2) whether the output of executing 'csrutil status'
239 contains 'System Integrity Protection status: enabled'.
240
241 Same problem on linux when SELinux is enabled. The status query program
242 'sestatus' and the output when it's enabled 'SELinux status: enabled'. """
243
244 if platform == 'darwin':
245 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
246 command = ['csrutil', 'status']
247 elif platform in {'linux', 'linux2'}:
248 pattern = re.compile(r'SELinux status:\s+enabled')
249 command = ['sestatus']
250 else:
251 return False
252
253 try:
254 lines = subprocess.check_output(command).decode('utf-8')
255 return any((pattern.match(line) for line in lines.splitlines()))
256 except:
257 return False
258
259
260def entry_hash(entry):
261 """ Implement unique hash method for compilation database entries. """
262
263 # For faster lookup in set filename is reverted
264 filename = entry['file'][::-1]
265 # For faster lookup in set directory is reverted
266 directory = entry['directory'][::-1]
267 # On OS X the 'cc' and 'c++' compilers are wrappers for
268 # 'clang' therefore both call would be logged. To avoid
269 # this the hash does not contain the first word of the
270 # command.
271 command = ' '.join(decode(entry['command'])[1:])
272
273 return '<>'.join([filename, directory, command])
274
275
276def create_parser():
277 """ Command line argument parser factory method. """
278
279 parser = argparse.ArgumentParser(
280 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
281
282 parser.add_argument(
283 '--verbose', '-v',
284 action='count',
285 default=0,
286 help="""Enable verbose output from '%(prog)s'. A second and third
287 flag increases verbosity.""")
288 parser.add_argument(
289 '--cdb',
290 metavar='<file>',
291 default="compile_commands.json",
292 help="""The JSON compilation database.""")
293 group = parser.add_mutually_exclusive_group()
294 group.add_argument(
295 '--append',
296 action='store_true',
297 help="""Append new entries to existing compilation database.""")
298 group.add_argument(
299 '--disable-filter', '-n',
300 dest='raw_entries',
301 action='store_true',
302 help="""Intercepted child process creation calls (exec calls) are all
303 logged to the output. The output is not a compilation database.
304 This flag is for debug purposes.""")
305
306 advanced = parser.add_argument_group('advanced options')
307 advanced.add_argument(
308 '--override-compiler',
309 action='store_true',
310 help="""Always resort to the compiler wrapper even when better
311 intercept methods are available.""")
312 advanced.add_argument(
313 '--use-cc',
314 metavar='<path>',
315 dest='cc',
316 default='cc',
317 help="""When '%(prog)s' analyzes a project by interposing a compiler
318 wrapper, which executes a real compiler for compilation and
319 do other tasks (record the compiler invocation). Because of
320 this interposing, '%(prog)s' does not know what compiler your
321 project normally uses. Instead, it simply overrides the CC
322 environment variable, and guesses your default compiler.
323
324 If you need '%(prog)s' to use a specific compiler for
325 *compilation* then you can use this option to specify a path
326 to that compiler.""")
327 advanced.add_argument(
328 '--use-c++',
329 metavar='<path>',
330 dest='cxx',
331 default='c++',
332 help="""This is the same as "--use-cc" but for C++ code.""")
333
334 parser.add_argument(
335 dest='build',
336 nargs=argparse.REMAINDER,
337 help="""Command to run.""")
338
339 return parser