blob: 2d1e825d4495df3de50a48195b9984b94b557d85 [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to capture the compiler invocation of any
7build process. The result of that should be a compilation database.
8
9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10mechanisms provided by the dynamic linker. The related library is implemented
11in C language and can be found under 'libear' directory.
12
13The 'libear' library is capturing all child process creation and logging the
14relevant information about it into separate files in a specified directory.
15The parameter of this process is the output directory name, where the report
16files shall be placed. This parameter is passed as an environment variable.
17
18The module also implements compiler wrappers to intercept the compiler calls.
19
20The module implements the build command execution and the post-processing of
21the output files, which will condensates into a compilation database. """
22
23import sys
24import os
25import os.path
26import re
27import itertools
28import json
29import glob
30import argparse
31import logging
Laszlo Nagybc687582016-01-12 22:38:41 +000032from libear import build_libear, TemporaryDirectory
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000033from libscanbuild import command_entry_point, compiler_wrapper, \
34 wrapper_environment, run_command, run_build, reconfigure_logging
35from libscanbuild import duplicate_check, tempdir
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000036from libscanbuild.compilation import split_command
Laszlo Nagybc687582016-01-12 22:38:41 +000037from libscanbuild.shell import encode, decode
38
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000039__all__ = ['capture', 'intercept_build_main', 'intercept_compiler_wrapper']
Laszlo Nagybc687582016-01-12 22:38:41 +000040
41GS = chr(0x1d)
42RS = chr(0x1e)
43US = chr(0x1f)
44
45COMPILER_WRAPPER_CC = 'intercept-cc'
46COMPILER_WRAPPER_CXX = 'intercept-c++'
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000047TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000048WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
Laszlo Nagybc687582016-01-12 22:38:41 +000049
50
51@command_entry_point
52def intercept_build_main(bin_dir):
53 """ Entry point for 'intercept-build' command. """
54
55 parser = create_parser()
56 args = parser.parse_args()
57
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000058 reconfigure_logging(args.verbose)
59 logging.debug('Raw arguments %s', sys.argv)
Laszlo Nagybc687582016-01-12 22:38:41 +000060
61 if not args.build:
62 parser.print_help()
63 return 0
64
65 return capture(args, bin_dir)
66
67
68def capture(args, bin_dir):
69 """ The entry point of build command interception. """
70
71 def post_processing(commands):
72 """ To make a compilation database, it needs to filter out commands
73 which are not compiler calls. Needs to find the source file name
74 from the arguments. And do shell escaping on the command.
75
76 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000077 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000078 shall be merged with the new elements. """
79
80 # create entries from the current run
81 current = itertools.chain.from_iterable(
82 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000083 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000084 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000085 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000086 with open(args.cdb) as handle:
87 previous = iter(json.load(handle))
88 else:
89 previous = iter([])
90 # filter out duplicate entries from both
91 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000092 return (entry
93 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000094 if os.path.exists(entry['file']) and not duplicate(entry))
95
96 with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir:
97 # run the build command
98 environment = setup_environment(args, tmp_dir, bin_dir)
Laszlo Nagy52c1d7e2017-02-14 10:30:50 +000099 exit_code = run_build(args.build, env=environment)
Laszlo Nagybc687582016-01-12 22:38:41 +0000100 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000101 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +0000102 parse_exec_trace(os.path.join(tmp_dir, filename))
103 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
Laszlo Nagy3a556112017-01-29 04:59:32 +0000104 # do post processing
105 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +0000106 # dump the compilation database
107 with open(args.cdb, 'w+') as handle:
108 json.dump(list(entries), handle, sort_keys=True, indent=4)
109 return exit_code
110
111
112def setup_environment(args, destination, bin_dir):
113 """ Sets up the environment for the build command.
114
115 It sets the required environment variables and execute the given command.
116 The exec calls will be logged by the 'libear' preloaded library or by the
117 'wrapper' programs. """
118
119 c_compiler = args.cc if 'cc' in args else 'cc'
120 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
121
122 libear_path = None if args.override_compiler or is_preload_disabled(
123 sys.platform) else build_libear(c_compiler, destination)
124
125 environment = dict(os.environ)
126 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
127
128 if not libear_path:
129 logging.debug('intercept gonna use compiler wrappers')
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000130 environment.update(wrapper_environment(args))
Laszlo Nagybc687582016-01-12 22:38:41 +0000131 environment.update({
132 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC),
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000133 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX)
Laszlo Nagybc687582016-01-12 22:38:41 +0000134 })
135 elif sys.platform == 'darwin':
136 logging.debug('intercept gonna preload libear on OSX')
137 environment.update({
138 'DYLD_INSERT_LIBRARIES': libear_path,
139 'DYLD_FORCE_FLAT_NAMESPACE': '1'
140 })
141 else:
142 logging.debug('intercept gonna preload libear on UNIX')
143 environment.update({'LD_PRELOAD': libear_path})
144
145 return environment
146
147
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000148@command_entry_point
149def intercept_compiler_wrapper():
150 """ Entry point for `intercept-cc` and `intercept-c++`. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000151
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000152 return compiler_wrapper(intercept_compiler_wrapper_impl)
Laszlo Nagybc687582016-01-12 22:38:41 +0000153
Laszlo Nagybc687582016-01-12 22:38:41 +0000154
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000155def intercept_compiler_wrapper_impl(_, execution):
156 """ Implement intercept compiler wrapper functionality.
157
158 It does generate execution report into target directory.
159 The target directory name is from environment variables. """
160
161 message_prefix = 'execution report might be incomplete: %s'
162
163 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
164 if not target_dir:
165 logging.warning(message_prefix, 'missing target directory')
166 return
167 # write current execution info to the pid file
Laszlo Nagybc687582016-01-12 22:38:41 +0000168 try:
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000169 target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION
170 target_file = os.path.join(target_dir, target_file_name)
171 logging.debug('writing execution report to: %s', target_file)
172 write_exec_trace(target_file, execution)
Laszlo Nagybc687582016-01-12 22:38:41 +0000173 except IOError:
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000174 logging.warning(message_prefix, 'io problem')
175
176
177def write_exec_trace(filename, entry):
178 """ Write execution report file.
179
180 This method shall be sync with the execution report writer in interception
181 library. The entry in the file is a JSON objects.
182
183 :param filename: path to the output execution trace file,
184 :param entry: the Execution object to append to that file. """
185
186 with open(filename, 'ab') as handler:
187 pid = str(entry.pid)
188 command = US.join(entry.cmd) + US
189 content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS
190 handler.write(content.encode('utf-8'))
Laszlo Nagybc687582016-01-12 22:38:41 +0000191
192
193def parse_exec_trace(filename):
194 """ Parse the file generated by the 'libear' preloaded library.
195
196 Given filename points to a file which contains the basic report
197 generated by the interception library or wrapper command. A single
198 report file _might_ contain multiple process creation info. """
199
200 logging.debug('parse exec trace file: %s', filename)
201 with open(filename, 'r') as handler:
202 content = handler.read()
203 for group in filter(bool, content.split(GS)):
204 records = group.split(RS)
205 yield {
206 'pid': records[0],
207 'ppid': records[1],
208 'function': records[2],
209 'directory': records[3],
210 'command': records[4].split(US)[:-1]
211 }
212
213
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000214def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000215 """ Generate the desired fields for compilation database entries. """
216
217 def abspath(cwd, name):
218 """ Create normalized absolute path from input filename. """
219 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
220 return os.path.normpath(fullname)
221
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000222 logging.debug('format this command: %s', exec_trace['command'])
223 compilation = split_command(exec_trace['command'])
224 if compilation:
225 for source in compilation.files:
226 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
227 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000228 logging.debug('formated as: %s', command)
229 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000230 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000231 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000232 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000233 }
234
235
Laszlo Nagybc687582016-01-12 22:38:41 +0000236def is_preload_disabled(platform):
237 """ Library-based interposition will fail silently if SIP is enabled,
238 so this should be detected. You can detect whether SIP is enabled on
239 Darwin by checking whether (1) there is a binary called 'csrutil' in
240 the path and, if so, (2) whether the output of executing 'csrutil status'
241 contains 'System Integrity Protection status: enabled'.
242
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000243 :param platform: name of the platform (returned by sys.platform),
244 :return: True if library preload will fail by the dynamic linker. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000245
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000246 if platform in WRAPPER_ONLY_PLATFORMS:
247 return True
248 elif platform == 'darwin':
Laszlo Nagybc687582016-01-12 22:38:41 +0000249 command = ['csrutil', 'status']
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000250 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
251 try:
252 return any(pattern.match(line) for line in run_command(command))
253 except:
254 return False
Laszlo Nagybc687582016-01-12 22:38:41 +0000255 else:
256 return False
257
Laszlo Nagybc687582016-01-12 22:38:41 +0000258
259def entry_hash(entry):
260 """ Implement unique hash method for compilation database entries. """
261
262 # For faster lookup in set filename is reverted
263 filename = entry['file'][::-1]
264 # For faster lookup in set directory is reverted
265 directory = entry['directory'][::-1]
266 # On OS X the 'cc' and 'c++' compilers are wrappers for
267 # 'clang' therefore both call would be logged. To avoid
268 # this the hash does not contain the first word of the
269 # command.
270 command = ' '.join(decode(entry['command'])[1:])
271
272 return '<>'.join([filename, directory, command])
273
274
275def create_parser():
276 """ Command line argument parser factory method. """
277
278 parser = argparse.ArgumentParser(
279 formatter_class=argparse.ArgumentDefaultsHelpFormatter)
280
281 parser.add_argument(
282 '--verbose', '-v',
283 action='count',
284 default=0,
285 help="""Enable verbose output from '%(prog)s'. A second and third
286 flag increases verbosity.""")
287 parser.add_argument(
288 '--cdb',
289 metavar='<file>',
290 default="compile_commands.json",
291 help="""The JSON compilation database.""")
292 group = parser.add_mutually_exclusive_group()
293 group.add_argument(
294 '--append',
295 action='store_true',
296 help="""Append new entries to existing compilation database.""")
Laszlo Nagybc687582016-01-12 22:38:41 +0000297
298 advanced = parser.add_argument_group('advanced options')
299 advanced.add_argument(
300 '--override-compiler',
301 action='store_true',
302 help="""Always resort to the compiler wrapper even when better
303 intercept methods are available.""")
304 advanced.add_argument(
305 '--use-cc',
306 metavar='<path>',
307 dest='cc',
308 default='cc',
309 help="""When '%(prog)s' analyzes a project by interposing a compiler
310 wrapper, which executes a real compiler for compilation and
311 do other tasks (record the compiler invocation). Because of
312 this interposing, '%(prog)s' does not know what compiler your
313 project normally uses. Instead, it simply overrides the CC
314 environment variable, and guesses your default compiler.
315
316 If you need '%(prog)s' to use a specific compiler for
317 *compilation* then you can use this option to specify a path
318 to that compiler.""")
319 advanced.add_argument(
320 '--use-c++',
321 metavar='<path>',
322 dest='cxx',
323 default='c++',
324 help="""This is the same as "--use-cc" but for C++ code.""")
325
326 parser.add_argument(
327 dest='build',
328 nargs=argparse.REMAINDER,
329 help="""Command to run.""")
330
331 return parser