blob: 70f3233f5e8cb493da4a0ea2d6c52127426675f6 [file] [log] [blame]
Laszlo Nagybc687582016-01-12 22:38:41 +00001# -*- coding: utf-8 -*-
Chandler Carruth2946cd72019-01-19 08:50:56 +00002# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3# See https://llvm.org/LICENSE.txt for license information.
4# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Laszlo Nagybc687582016-01-12 22:38:41 +00005""" This module is responsible to capture the compiler invocation of any
6build process. The result of that should be a compilation database.
7
8This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES
9mechanisms provided by the dynamic linker. The related library is implemented
10in C language and can be found under 'libear' directory.
11
12The 'libear' library is capturing all child process creation and logging the
13relevant information about it into separate files in a specified directory.
14The parameter of this process is the output directory name, where the report
15files shall be placed. This parameter is passed as an environment variable.
16
17The module also implements compiler wrappers to intercept the compiler calls.
18
19The module implements the build command execution and the post-processing of
20the output files, which will condensates into a compilation database. """
21
22import sys
23import os
24import os.path
25import re
26import itertools
27import json
28import glob
Laszlo Nagybc687582016-01-12 22:38:41 +000029import logging
Laszlo Nagybc687582016-01-12 22:38:41 +000030from libear import build_libear, TemporaryDirectory
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000031from libscanbuild import command_entry_point, compiler_wrapper, \
Laszlo Nagy5270bb92017-03-08 21:18:51 +000032 wrapper_environment, run_command, run_build
Laszlo Nagy0d9be632017-03-20 09:03:24 +000033from libscanbuild import duplicate_check
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000034from libscanbuild.compilation import split_command
Laszlo Nagy5270bb92017-03-08 21:18:51 +000035from libscanbuild.arguments import parse_args_for_intercept_build
Laszlo Nagybc687582016-01-12 22:38:41 +000036from libscanbuild.shell import encode, decode
37
Laszlo Nagy5270bb92017-03-08 21:18:51 +000038__all__ = ['capture', 'intercept_build', 'intercept_compiler_wrapper']
Laszlo Nagybc687582016-01-12 22:38:41 +000039
40GS = chr(0x1d)
41RS = chr(0x1e)
42US = chr(0x1f)
43
44COMPILER_WRAPPER_CC = 'intercept-cc'
45COMPILER_WRAPPER_CXX = 'intercept-c++'
Laszlo Nagy2e9c9222017-03-04 01:08:05 +000046TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c
Laszlo Nagy46fc18a2017-01-28 22:48:26 +000047WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'})
Laszlo Nagybc687582016-01-12 22:38:41 +000048
49
50@command_entry_point
Laszlo Nagy5270bb92017-03-08 21:18:51 +000051def intercept_build():
Laszlo Nagybc687582016-01-12 22:38:41 +000052 """ Entry point for 'intercept-build' command. """
53
Laszlo Nagy5270bb92017-03-08 21:18:51 +000054 args = parse_args_for_intercept_build()
55 return capture(args)
Laszlo Nagybc687582016-01-12 22:38:41 +000056
57
Laszlo Nagy5270bb92017-03-08 21:18:51 +000058def capture(args):
Laszlo Nagybc687582016-01-12 22:38:41 +000059 """ The entry point of build command interception. """
60
61 def post_processing(commands):
62 """ To make a compilation database, it needs to filter out commands
63 which are not compiler calls. Needs to find the source file name
64 from the arguments. And do shell escaping on the command.
65
66 To support incremental builds, it is desired to read elements from
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000067 an existing compilation database from a previous run. These elements
Laszlo Nagybc687582016-01-12 22:38:41 +000068 shall be merged with the new elements. """
69
70 # create entries from the current run
71 current = itertools.chain.from_iterable(
72 # creates a sequence of entry generators from an exec,
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000073 format_entry(command) for command in commands)
Laszlo Nagybc687582016-01-12 22:38:41 +000074 # read entries from previous run
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000075 if 'append' in args and args.append and os.path.isfile(args.cdb):
Laszlo Nagybc687582016-01-12 22:38:41 +000076 with open(args.cdb) as handle:
77 previous = iter(json.load(handle))
78 else:
79 previous = iter([])
80 # filter out duplicate entries from both
81 duplicate = duplicate_check(entry_hash)
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000082 return (entry
83 for entry in itertools.chain(previous, current)
Laszlo Nagybc687582016-01-12 22:38:41 +000084 if os.path.exists(entry['file']) and not duplicate(entry))
85
Laszlo Nagy0d9be632017-03-20 09:03:24 +000086 with TemporaryDirectory(prefix='intercept-') as tmp_dir:
Laszlo Nagybc687582016-01-12 22:38:41 +000087 # run the build command
Laszlo Nagy5270bb92017-03-08 21:18:51 +000088 environment = setup_environment(args, tmp_dir)
Laszlo Nagy52c1d7e2017-02-14 10:30:50 +000089 exit_code = run_build(args.build, env=environment)
Laszlo Nagybc687582016-01-12 22:38:41 +000090 # read the intercepted exec calls
Laszlo Nagy8bd63e52016-04-19 12:03:03 +000091 exec_traces = itertools.chain.from_iterable(
Laszlo Nagybc687582016-01-12 22:38:41 +000092 parse_exec_trace(os.path.join(tmp_dir, filename))
93 for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd'))))
Laszlo Nagy3a556112017-01-29 04:59:32 +000094 # do post processing
95 entries = post_processing(exec_traces)
Laszlo Nagybc687582016-01-12 22:38:41 +000096 # dump the compilation database
97 with open(args.cdb, 'w+') as handle:
98 json.dump(list(entries), handle, sort_keys=True, indent=4)
99 return exit_code
100
101
Laszlo Nagy5270bb92017-03-08 21:18:51 +0000102def setup_environment(args, destination):
Laszlo Nagybc687582016-01-12 22:38:41 +0000103 """ Sets up the environment for the build command.
104
105 It sets the required environment variables and execute the given command.
106 The exec calls will be logged by the 'libear' preloaded library or by the
107 'wrapper' programs. """
108
109 c_compiler = args.cc if 'cc' in args else 'cc'
110 cxx_compiler = args.cxx if 'cxx' in args else 'c++'
111
112 libear_path = None if args.override_compiler or is_preload_disabled(
113 sys.platform) else build_libear(c_compiler, destination)
114
115 environment = dict(os.environ)
116 environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination})
117
118 if not libear_path:
119 logging.debug('intercept gonna use compiler wrappers')
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000120 environment.update(wrapper_environment(args))
Laszlo Nagybc687582016-01-12 22:38:41 +0000121 environment.update({
Laszlo Nagy5270bb92017-03-08 21:18:51 +0000122 'CC': COMPILER_WRAPPER_CC,
123 'CXX': COMPILER_WRAPPER_CXX
Laszlo Nagybc687582016-01-12 22:38:41 +0000124 })
125 elif sys.platform == 'darwin':
126 logging.debug('intercept gonna preload libear on OSX')
127 environment.update({
128 'DYLD_INSERT_LIBRARIES': libear_path,
129 'DYLD_FORCE_FLAT_NAMESPACE': '1'
130 })
131 else:
132 logging.debug('intercept gonna preload libear on UNIX')
133 environment.update({'LD_PRELOAD': libear_path})
134
135 return environment
136
137
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000138@command_entry_point
139def intercept_compiler_wrapper():
140 """ Entry point for `intercept-cc` and `intercept-c++`. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000141
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000142 return compiler_wrapper(intercept_compiler_wrapper_impl)
Laszlo Nagybc687582016-01-12 22:38:41 +0000143
Laszlo Nagybc687582016-01-12 22:38:41 +0000144
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000145def intercept_compiler_wrapper_impl(_, execution):
146 """ Implement intercept compiler wrapper functionality.
147
148 It does generate execution report into target directory.
149 The target directory name is from environment variables. """
150
151 message_prefix = 'execution report might be incomplete: %s'
152
153 target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR')
154 if not target_dir:
155 logging.warning(message_prefix, 'missing target directory')
156 return
157 # write current execution info to the pid file
Laszlo Nagybc687582016-01-12 22:38:41 +0000158 try:
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000159 target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION
160 target_file = os.path.join(target_dir, target_file_name)
161 logging.debug('writing execution report to: %s', target_file)
162 write_exec_trace(target_file, execution)
Laszlo Nagybc687582016-01-12 22:38:41 +0000163 except IOError:
Laszlo Nagy2e9c9222017-03-04 01:08:05 +0000164 logging.warning(message_prefix, 'io problem')
165
166
167def write_exec_trace(filename, entry):
168 """ Write execution report file.
169
170 This method shall be sync with the execution report writer in interception
171 library. The entry in the file is a JSON objects.
172
173 :param filename: path to the output execution trace file,
174 :param entry: the Execution object to append to that file. """
175
176 with open(filename, 'ab') as handler:
177 pid = str(entry.pid)
178 command = US.join(entry.cmd) + US
179 content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS
180 handler.write(content.encode('utf-8'))
Laszlo Nagybc687582016-01-12 22:38:41 +0000181
182
183def parse_exec_trace(filename):
184 """ Parse the file generated by the 'libear' preloaded library.
185
186 Given filename points to a file which contains the basic report
187 generated by the interception library or wrapper command. A single
188 report file _might_ contain multiple process creation info. """
189
190 logging.debug('parse exec trace file: %s', filename)
191 with open(filename, 'r') as handler:
192 content = handler.read()
193 for group in filter(bool, content.split(GS)):
194 records = group.split(RS)
195 yield {
196 'pid': records[0],
197 'ppid': records[1],
198 'function': records[2],
199 'directory': records[3],
200 'command': records[4].split(US)[:-1]
201 }
202
203
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000204def format_entry(exec_trace):
Laszlo Nagybc687582016-01-12 22:38:41 +0000205 """ Generate the desired fields for compilation database entries. """
206
207 def abspath(cwd, name):
208 """ Create normalized absolute path from input filename. """
209 fullname = name if os.path.isabs(name) else os.path.join(cwd, name)
210 return os.path.normpath(fullname)
211
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000212 logging.debug('format this command: %s', exec_trace['command'])
213 compilation = split_command(exec_trace['command'])
214 if compilation:
215 for source in compilation.files:
216 compiler = 'c++' if compilation.compiler == 'c++' else 'cc'
217 command = [compiler, '-c'] + compilation.flags + [source]
Laszlo Nagybc687582016-01-12 22:38:41 +0000218 logging.debug('formated as: %s', command)
219 yield {
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000220 'directory': exec_trace['directory'],
Laszlo Nagybc687582016-01-12 22:38:41 +0000221 'command': encode(command),
Laszlo Nagy8bd63e52016-04-19 12:03:03 +0000222 'file': abspath(exec_trace['directory'], source)
Laszlo Nagybc687582016-01-12 22:38:41 +0000223 }
224
225
Laszlo Nagybc687582016-01-12 22:38:41 +0000226def is_preload_disabled(platform):
227 """ Library-based interposition will fail silently if SIP is enabled,
228 so this should be detected. You can detect whether SIP is enabled on
229 Darwin by checking whether (1) there is a binary called 'csrutil' in
230 the path and, if so, (2) whether the output of executing 'csrutil status'
231 contains 'System Integrity Protection status: enabled'.
232
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000233 :param platform: name of the platform (returned by sys.platform),
234 :return: True if library preload will fail by the dynamic linker. """
Laszlo Nagybc687582016-01-12 22:38:41 +0000235
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000236 if platform in WRAPPER_ONLY_PLATFORMS:
237 return True
238 elif platform == 'darwin':
Laszlo Nagybc687582016-01-12 22:38:41 +0000239 command = ['csrutil', 'status']
Laszlo Nagy46fc18a2017-01-28 22:48:26 +0000240 pattern = re.compile(r'System Integrity Protection status:\s+enabled')
241 try:
242 return any(pattern.match(line) for line in run_command(command))
243 except:
244 return False
Laszlo Nagybc687582016-01-12 22:38:41 +0000245 else:
246 return False
247
Laszlo Nagybc687582016-01-12 22:38:41 +0000248
249def entry_hash(entry):
250 """ Implement unique hash method for compilation database entries. """
251
252 # For faster lookup in set filename is reverted
253 filename = entry['file'][::-1]
254 # For faster lookup in set directory is reverted
255 directory = entry['directory'][::-1]
256 # On OS X the 'cc' and 'c++' compilers are wrappers for
257 # 'clang' therefore both call would be logged. To avoid
258 # this the hash does not contain the first word of the
259 # command.
260 command = ' '.join(decode(entry['command'])[1:])
261
262 return '<>'.join([filename, directory, command])