blob: b3c55e0417d0b07b4939fb10dc5df357f1a2652f [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to capture the compiler invocation of any
7build process. The result of that should be a compilation database.
8
9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10mechanisms provided by the dynamic linker. The related library is implemented
11in C language and can be found under 'libear' directory.
12
13The 'libear' library is capturing all child process creation and logging the
14relevant information about it into separate files in a specified directory.
15The parameter of this process is the output directory name, where the report
16files shall be placed. This parameter is passed as an environment variable.
17
18The module also implements compiler wrappers to intercept the compiler calls.
19
20The module implements the build command execution and the post-processing of
21the output files, which will condensates into a compilation database. """
22
23import sys
24import os
25import os.path
26import re
27import itertools
28import json
29import glob
30import argparse
31import logging
32import subprocess
33from libear import build_libear, TemporaryDirectory
Laszlo Nagy52c1d7e2017-02-14 10:30:50 +000034from libscanbuild import command_entry_point, run_build, run_command
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000035from libscanbuild import duplicate_check, tempdir, initialize_logging
36from libscanbuild.compilation import split_command
Laszlo Nagybc687582016-01-12 22:38:41 +000037from libscanbuild.shell import encode, decode
38
39__all__ = ['capture', 'intercept_build_main', 'intercept_build_wrapper']
40
41GS = chr(0x1d)
42RS = chr(0x1e)
43US = chr(0x1f)
44
45COMPILER_WRAPPER_CC = 'intercept-cc'
46COMPILER_WRAPPER_CXX = 'intercept-c++'
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000047WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
Laszlo Nagybc687582016-01-12 22:38:41 +000048
49
50@command_entry_point
51def intercept_build_main(bin_dir):
52 """ Entry point for 'intercept-build' command. """
53
54 parser = create_parser()
55 args = parser.parse_args()
56
57 initialize_logging(args.verbose)
58 logging.debug('Parsed arguments: %s', args)
59
60 if not args.build:
61 parser.print_help()
62 return 0
63
64 return capture(args, bin_dir)
65
66
67def capture(args, bin_dir):
68 """ The entry point of build command interception. """
69
70 def post_processing(commands):
71 """ To make a compilation database, it needs to filter out commands
72 which are not compiler calls. Needs to find the source file name
73 from the arguments. And do shell escaping on the command.
74
75 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000076 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000077 shall be merged with the new elements. """
78
79 # create entries from the current run
80 current = itertools.chain.from_iterable(
81 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000082 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000083 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000084 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000085 with open(args.cdb) as handle:
86 previous = iter(json.load(handle))
87 else:
88 previous = iter([])
89 # filter out duplicate entries from both
90 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000091 return (entry
92 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000093 if os.path.exists(entry['file']) and not duplicate(entry))
94
95 with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir:
96 # run the build command
97 environment = setup_environment(args, tmp_dir, bin_dir)
Laszlo Nagy52c1d7e2017-02-14 10:30:50 +000098 exit_code = run_build(args.build, env=environment)
Laszlo Nagybc687582016-01-12 22:38:41 +000099 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000100 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +0000101 parse_exec_trace(os.path.join(tmp_dir, filename))
102 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
Laszlo Nagy3a556112017-01-29 04:59:32 +0000103 # do post processing
104 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +0000105 # dump the compilation database
106 with open(args.cdb, 'w+') as handle:
107 json.dump(list(entries), handle, sort_keys=True, indent=4)
108 return exit_code
109
110
111def setup_environment(args, destination, bin_dir):
112 """ Sets up the environment for the build command.
113
114 It sets the required environment variables and execute the given command.
115 The exec calls will be logged by the 'libear' preloaded library or by the
116 'wrapper' programs. """
117
118 c_compiler = args.cc if 'cc' in args else 'cc'
119 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
120
121 libear_path = None if args.override_compiler or is_preload_disabled(
122 sys.platform) else build_libear(c_compiler, destination)
123
124 environment = dict(os.environ)
125 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
126
127 if not libear_path:
128 logging.debug('intercept gonna use compiler wrappers')
129 environment.update({
130 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC),
131 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX),
132 'INTERCEPT_BUILD_CC': c_compiler,
133 'INTERCEPT_BUILD_CXX': cxx_compiler,
134 'INTERCEPT_BUILD_VERBOSE': 'DEBUG' if args.verbose > 2 else 'INFO'
135 })
136 elif sys.platform == 'darwin':
137 logging.debug('intercept gonna preload libear on OSX')
138 environment.update({
139 'DYLD_INSERT_LIBRARIES': libear_path,
140 'DYLD_FORCE_FLAT_NAMESPACE': '1'
141 })
142 else:
143 logging.debug('intercept gonna preload libear on UNIX')
144 environment.update({'LD_PRELOAD': libear_path})
145
146 return environment
147
148
149def intercept_build_wrapper(cplusplus):
150 """ Entry point for `intercept-cc` and `intercept-c++` compiler wrappers.
151
152 It does generate execution report into target directory. And execute
153 the wrapped compilation with the real compiler. The parameters for
154 report and execution are from environment variables.
155
156 Those parameters which for 'libear' library can't have meaningful
157 values are faked. """
158
159 # initialize wrapper logging
160 logging.basicConfig(format='intercept: %(levelname)s: %(message)s',
161 level=os.getenv('INTERCEPT_BUILD_VERBOSE', 'INFO'))
162 # write report
163 try:
164 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
165 if not target_dir:
166 raise UserWarning('exec report target directory not found')
167 pid = str(os.getpid())
168 target_file = os.path.join(target_dir, pid + '.cmd')
169 logging.debug('writing exec report to: %s', target_file)
170 with open(target_file, 'ab') as handler:
171 working_dir = os.getcwd()
172 command = US.join(sys.argv) + US
173 content = RS.join([pid, pid, 'wrapper', working_dir, command]) + GS
174 handler.write(content.encode('utf-8'))
175 except IOError:
176 logging.exception('writing exec report failed')
177 except UserWarning as warning:
178 logging.warning(warning)
179 # execute with real compiler
180 compiler = os.getenv('INTERCEPT_BUILD_CXX', 'c++') if cplusplus \
181 else os.getenv('INTERCEPT_BUILD_CC', 'cc')
182 compilation = [compiler] + sys.argv[1:]
183 logging.debug('execute compiler: %s', compilation)
184 return subprocess.call(compilation)
185
186
187def parse_exec_trace(filename):
188 """ Parse the file generated by the 'libear' preloaded library.
189
190 Given filename points to a file which contains the basic report
191 generated by the interception library or wrapper command. A single
192 report file _might_ contain multiple process creation info. """
193
194 logging.debug('parse exec trace file: %s', filename)
195 with open(filename, 'r') as handler:
196 content = handler.read()
197 for group in filter(bool, content.split(GS)):
198 records = group.split(RS)
199 yield {
200 'pid': records[0],
201 'ppid': records[1],
202 'function': records[2],
203 'directory': records[3],
204 'command': records[4].split(US)[:-1]
205 }
206
207
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000208def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000209 """ Generate the desired fields for compilation database entries. """
210
211 def abspath(cwd, name):
212 """ Create normalized absolute path from input filename. """
213 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
214 return os.path.normpath(fullname)
215
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000216 logging.debug('format this command: %s', exec_trace['command'])
217 compilation = split_command(exec_trace['command'])
218 if compilation:
219 for source in compilation.files:
220 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
221 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000222 logging.debug('formated as: %s', command)
223 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000224 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000225 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000226 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000227 }
228
229
Laszlo Nagybc687582016-01-12 22:38:41 +0000230def is_preload_disabled(platform):
231 """ Library-based interposition will fail silently if SIP is enabled,
232 so this should be detected. You can detect whether SIP is enabled on
233 Darwin by checking whether (1) there is a binary called 'csrutil' in
234 the path and, if so, (2) whether the output of executing 'csrutil status'
235 contains 'System Integrity Protection status: enabled'.
236
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000237 :param platform: name of the platform (returned by sys.platform),
238 :return: True if library preload will fail by the dynamic linker. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000239
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000240 if platform in WRAPPER_ONLY_PLATFORMS:
241 return True
242 elif platform == 'darwin':
Laszlo Nagybc687582016-01-12 22:38:41 +0000243 command = ['csrutil', 'status']
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000244 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
245 try:
246 return any(pattern.match(line) for line in run_command(command))
247 except:
248 return False
Laszlo Nagybc687582016-01-12 22:38:41 +0000249 else:
250 return False
251
Laszlo Nagybc687582016-01-12 22:38:41 +0000252
253def entry_hash(entry):
254 """ Implement unique hash method for compilation database entries. """
255
256 # For faster lookup in set filename is reverted
257 filename = entry['file'][::-1]
258 # For faster lookup in set directory is reverted
259 directory = entry['directory'][::-1]
260 # On OS X the 'cc' and 'c++' compilers are wrappers for
261 # 'clang' therefore both call would be logged. To avoid
262 # this the hash does not contain the first word of the
263 # command.
264 command = ' '.join(decode(entry['command'])[1:])
265
266 return '<>'.join([filename, directory, command])
267
268
269def create_parser():
270 """ Command line argument parser factory method. """
271
272 parser = argparse.ArgumentParser(
273 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
274
275 parser.add_argument(
276 '--verbose', '-v',
277 action='count',
278 default=0,
279 help="""Enable verbose output from '%(prog)s'. A second and third
280 flag increases verbosity.""")
281 parser.add_argument(
282 '--cdb',
283 metavar='<file>',
284 default="compile_commands.json",
285 help="""The JSON compilation database.""")
286 group = parser.add_mutually_exclusive_group()
287 group.add_argument(
288 '--append',
289 action='store_true',
290 help="""Append new entries to existing compilation database.""")
Laszlo Nagybc687582016-01-12 22:38:41 +0000291
292 advanced = parser.add_argument_group('advanced options')
293 advanced.add_argument(
294 '--override-compiler',
295 action='store_true',
296 help="""Always resort to the compiler wrapper even when better
297 intercept methods are available.""")
298 advanced.add_argument(
299 '--use-cc',
300 metavar='<path>',
301 dest='cc',
302 default='cc',
303 help="""When '%(prog)s' analyzes a project by interposing a compiler
304 wrapper, which executes a real compiler for compilation and
305 do other tasks (record the compiler invocation). Because of
306 this interposing, '%(prog)s' does not know what compiler your
307 project normally uses. Instead, it simply overrides the CC
308 environment variable, and guesses your default compiler.
309
310 If you need '%(prog)s' to use a specific compiler for
311 *compilation* then you can use this option to specify a path
312 to that compiler.""")
313 advanced.add_argument(
314 '--use-c++',
315 metavar='<path>',
316 dest='cxx',
317 default='c++',
318 help="""This is the same as "--use-cc" but for C++ code.""")
319
320 parser.add_argument(
321 dest='build',
322 nargs=argparse.REMAINDER,
323 help="""Command to run.""")
324
325 return parser