blob: b9bf9e917526d15b9fee19bafbc68626a713398d [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible to capture the compiler invocation of any
7build process. The result of that should be a compilation database.
8
9This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
10mechanisms provided by the dynamic linker. The related library is implemented
11in C language and can be found under 'libear' directory.
12
13The 'libear' library is capturing all child process creation and logging the
14relevant information about it into separate files in a specified directory.
15The parameter of this process is the output directory name, where the report
16files shall be placed. This parameter is passed as an environment variable.
17
18The module also implements compiler wrappers to intercept the compiler calls.
19
20The module implements the build command execution and the post-processing of
21the output files, which will condensates into a compilation database. """
22
23import sys
24import os
25import os.path
26import re
27import itertools
28import json
29import glob
Laszlo Nagybc687582016-01-12 22:38:41 +000030import logging
Laszlo Nagybc687582016-01-12 22:38:41 +000031from libear import build_libear, TemporaryDirectory
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000032from libscanbuild import command_entry_point, compiler_wrapper, \
Laszlo Nagy5270bb92017-03-08 21:18:51 +000033 wrapper_environment, run_command, run_build
Laszlo Nagy0d9be632017-03-20 09:03:24 +000034from libscanbuild import duplicate_check
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000035from libscanbuild.compilation import split_command
Laszlo Nagy5270bb92017-03-08 21:18:51 +000036from libscanbuild.arguments import parse_args_for_intercept_build
Laszlo Nagybc687582016-01-12 22:38:41 +000037from libscanbuild.shell import encode, decode
38
Laszlo Nagy5270bb92017-03-08 21:18:51 +000039__all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper']
Laszlo Nagybc687582016-01-12 22:38:41 +000040
41GS = chr(0x1d)
42RS = chr(0x1e)
43US = chr(0x1f)
44
45COMPILER_WRAPPER_CC = 'intercept-cc'
46COMPILER_WRAPPER_CXX = 'intercept-c++'
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000047TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000048WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
Laszlo Nagybc687582016-01-12 22:38:41 +000049
50
51@command_entry_point
Laszlo Nagy5270bb92017-03-08 21:18:51 +000052def intercept_build():
Laszlo Nagybc687582016-01-12 22:38:41 +000053 """ Entry point for 'intercept-build' command. """
54
Laszlo Nagy5270bb92017-03-08 21:18:51 +000055 args = parse_args_for_intercept_build()
56 return capture(args)
Laszlo Nagybc687582016-01-12 22:38:41 +000057
58
Laszlo Nagy5270bb92017-03-08 21:18:51 +000059def capture(args):
Laszlo Nagybc687582016-01-12 22:38:41 +000060 """ The entry point of build command interception. """
61
62 def post_processing(commands):
63 """ To make a compilation database, it needs to filter out commands
64 which are not compiler calls. Needs to find the source file name
65 from the arguments. And do shell escaping on the command.
66
67 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000068 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000069 shall be merged with the new elements. """
70
71 # create entries from the current run
72 current = itertools.chain.from_iterable(
73 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000074 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000075 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000076 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000077 with open(args.cdb) as handle:
78 previous = iter(json.load(handle))
79 else:
80 previous = iter([])
81 # filter out duplicate entries from both
82 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000083 return (entry
84 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000085 if os.path.exists(entry['file']) and not duplicate(entry))
86
Laszlo Nagy0d9be632017-03-20 09:03:24 +000087 with TemporaryDirectory(prefix='intercept-') as tmp_dir:
Laszlo Nagybc687582016-01-12 22:38:41 +000088 # run the build command
Laszlo Nagy5270bb92017-03-08 21:18:51 +000089 environment = setup_environment(args, tmp_dir)
Laszlo Nagy52c1d7e2017-02-14 10:30:50 +000090 exit_code = run_build(args.build, env=environment)
Laszlo Nagybc687582016-01-12 22:38:41 +000091 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000092 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +000093 parse_exec_trace(os.path.join(tmp_dir, filename))
94 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
Laszlo Nagy3a556112017-01-29 04:59:32 +000095 # do post processing
96 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +000097 # dump the compilation database
98 with open(args.cdb, 'w+') as handle:
99 json.dump(list(entries), handle, sort_keys=True, indent=4)
100 return exit_code
101
102
Laszlo Nagy5270bb92017-03-08 21:18:51 +0000103def setup_environment(args, destination):
Laszlo Nagybc687582016-01-12 22:38:41 +0000104 """ Sets up the environment for the build command.
105
106 It sets the required environment variables and execute the given command.
107 The exec calls will be logged by the 'libear' preloaded library or by the
108 'wrapper' programs. """
109
110 c_compiler = args.cc if 'cc' in args else 'cc'
111 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
112
113 libear_path = None if args.override_compiler or is_preload_disabled(
114 sys.platform) else build_libear(c_compiler, destination)
115
116 environment = dict(os.environ)
117 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
118
119 if not libear_path:
120 logging.debug('intercept gonna use compiler wrappers')
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000121 environment.update(wrapper_environment(args))
Laszlo Nagybc687582016-01-12 22:38:41 +0000122 environment.update({
Laszlo Nagy5270bb92017-03-08 21:18:51 +0000123 'CC': COMPILER_WRAPPER_CC,
124 'CXX': COMPILER_WRAPPER_CXX
Laszlo Nagybc687582016-01-12 22:38:41 +0000125 })
126 elif sys.platform == 'darwin':
127 logging.debug('intercept gonna preload libear on OSX')
128 environment.update({
129 'DYLD_INSERT_LIBRARIES': libear_path,
130 'DYLD_FORCE_FLAT_NAMESPACE': '1'
131 })
132 else:
133 logging.debug('intercept gonna preload libear on UNIX')
134 environment.update({'LD_PRELOAD': libear_path})
135
136 return environment
137
138
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000139@command_entry_point
140def intercept_compiler_wrapper():
141 """ Entry point for `intercept-cc` and `intercept-c++`. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000142
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000143 return compiler_wrapper(intercept_compiler_wrapper_impl)
Laszlo Nagybc687582016-01-12 22:38:41 +0000144
Laszlo Nagybc687582016-01-12 22:38:41 +0000145
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000146def intercept_compiler_wrapper_impl(_, execution):
147 """ Implement intercept compiler wrapper functionality.
148
149 It does generate execution report into target directory.
150 The target directory name is from environment variables. """
151
152 message_prefix = 'execution report might be incomplete: %s'
153
154 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
155 if not target_dir:
156 logging.warning(message_prefix, 'missing target directory')
157 return
158 # write current execution info to the pid file
Laszlo Nagybc687582016-01-12 22:38:41 +0000159 try:
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000160 target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION
161 target_file = os.path.join(target_dir, target_file_name)
162 logging.debug('writing execution report to: %s', target_file)
163 write_exec_trace(target_file, execution)
Laszlo Nagybc687582016-01-12 22:38:41 +0000164 except IOError:
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000165 logging.warning(message_prefix, 'io problem')
166
167
168def write_exec_trace(filename, entry):
169 """ Write execution report file.
170
171 This method shall be sync with the execution report writer in interception
172 library. The entry in the file is a JSON objects.
173
174 :param filename: path to the output execution trace file,
175 :param entry: the Execution object to append to that file. """
176
177 with open(filename, 'ab') as handler:
178 pid = str(entry.pid)
179 command = US.join(entry.cmd) + US
180 content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS
181 handler.write(content.encode('utf-8'))
Laszlo Nagybc687582016-01-12 22:38:41 +0000182
183
184def parse_exec_trace(filename):
185 """ Parse the file generated by the 'libear' preloaded library.
186
187 Given filename points to a file which contains the basic report
188 generated by the interception library or wrapper command. A single
189 report file _might_ contain multiple process creation info. """
190
191 logging.debug('parse exec trace file: %s', filename)
192 with open(filename, 'r') as handler:
193 content = handler.read()
194 for group in filter(bool, content.split(GS)):
195 records = group.split(RS)
196 yield {
197 'pid': records[0],
198 'ppid': records[1],
199 'function': records[2],
200 'directory': records[3],
201 'command': records[4].split(US)[:-1]
202 }
203
204
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000205def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000206 """ Generate the desired fields for compilation database entries. """
207
208 def abspath(cwd, name):
209 """ Create normalized absolute path from input filename. """
210 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
211 return os.path.normpath(fullname)
212
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000213 logging.debug('format this command: %s', exec_trace['command'])
214 compilation = split_command(exec_trace['command'])
215 if compilation:
216 for source in compilation.files:
217 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
218 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000219 logging.debug('formated as: %s', command)
220 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000221 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000222 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000223 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000224 }
225
226
Laszlo Nagybc687582016-01-12 22:38:41 +0000227def is_preload_disabled(platform):
228 """ Library-based interposition will fail silently if SIP is enabled,
229 so this should be detected. You can detect whether SIP is enabled on
230 Darwin by checking whether (1) there is a binary called 'csrutil' in
231 the path and, if so, (2) whether the output of executing 'csrutil status'
232 contains 'System Integrity Protection status: enabled'.
233
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000234 :param platform: name of the platform (returned by sys.platform),
235 :return: True if library preload will fail by the dynamic linker. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000236
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000237 if platform in WRAPPER_ONLY_PLATFORMS:
238 return True
239 elif platform == 'darwin':
Laszlo Nagybc687582016-01-12 22:38:41 +0000240 command = ['csrutil', 'status']
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000241 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
242 try:
243 return any(pattern.match(line) for line in run_command(command))
244 except:
245 return False
Laszlo Nagybc687582016-01-12 22:38:41 +0000246 else:
247 return False
248
Laszlo Nagybc687582016-01-12 22:38:41 +0000249
250def entry_hash(entry):
251 """ Implement unique hash method for compilation database entries. """
252
253 # For faster lookup in set filename is reverted
254 filename = entry['file'][::-1]
255 # For faster lookup in set directory is reverted
256 directory = entry['directory'][::-1]
257 # On OS X the 'cc' and 'c++' compilers are wrappers for
258 # 'clang' therefore both call would be logged. To avoid
259 # this the hash does not contain the first word of the
260 # command.
261 command = ' '.join(decode(entry['command'])[1:])
262
263 return '<>'.join([filename, directory, command])