Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # The LLVM Compiler Infrastructure |
| 3 | # |
| 4 | # This file is distributed under the University of Illinois Open Source |
| 5 | # License. See LICENSE.TXT for details. |
| 6 | """ This module is responsible to capture the compiler invocation of any |
| 7 | build process. The result of that should be a compilation database. |
| 8 | |
| 9 | This implementation is using the LD_PRELOAD or DYLD_INSERT_LIBRARIES |
| 10 | mechanisms provided by the dynamic linker. The related library is implemented |
| 11 | in C language and can be found under 'libear' directory. |
| 12 | |
| 13 | The 'libear' library is capturing all child process creation and logging the |
| 14 | relevant information about it into separate files in a specified directory. |
| 15 | The parameter of this process is the output directory name, where the report |
| 16 | files shall be placed. This parameter is passed as an environment variable. |
| 17 | |
| 18 | The module also implements compiler wrappers to intercept the compiler calls. |
| 19 | |
| 20 | The module implements the build command execution and the post-processing of |
| 21 | the output files, which will condensates into a compilation database. """ |
| 22 | |
| 23 | import sys |
| 24 | import os |
| 25 | import os.path |
| 26 | import re |
| 27 | import itertools |
| 28 | import json |
| 29 | import glob |
| 30 | import argparse |
| 31 | import logging |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 32 | from libear import build_libear, TemporaryDirectory |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 33 | from libscanbuild import command_entry_point, compiler_wrapper, \ |
| 34 | wrapper_environment, run_command, run_build, reconfigure_logging |
| 35 | from libscanbuild import duplicate_check, tempdir |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 36 | from libscanbuild.compilation import split_command |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 37 | from libscanbuild.shell import encode, decode |
| 38 | |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 39 | __all__ = ['capture', 'intercept_build_main', 'intercept_compiler_wrapper'] |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 40 | |
| 41 | GS = chr(0x1d) |
| 42 | RS = chr(0x1e) |
| 43 | US = chr(0x1f) |
| 44 | |
| 45 | COMPILER_WRAPPER_CC = 'intercept-cc' |
| 46 | COMPILER_WRAPPER_CXX = 'intercept-c++' |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 47 | TRACE_FILE_EXTENSION = '.cmd' # same as in ear.c |
Laszlo Nagy | 46fc18a | 2017-01-28 22:48:26 +0000 | [diff] [blame] | 48 | WRAPPER_ONLY_PLATFORMS = frozenset({'win32', 'cygwin'}) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 49 | |
| 50 | |
| 51 | @command_entry_point |
| 52 | def intercept_build_main(bin_dir): |
| 53 | """ Entry point for 'intercept-build' command. """ |
| 54 | |
| 55 | parser = create_parser() |
| 56 | args = parser.parse_args() |
| 57 | |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 58 | reconfigure_logging(args.verbose) |
| 59 | logging.debug('Raw arguments %s', sys.argv) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 60 | |
| 61 | if not args.build: |
| 62 | parser.print_help() |
| 63 | return 0 |
| 64 | |
| 65 | return capture(args, bin_dir) |
| 66 | |
| 67 | |
| 68 | def capture(args, bin_dir): |
| 69 | """ The entry point of build command interception. """ |
| 70 | |
| 71 | def post_processing(commands): |
| 72 | """ To make a compilation database, it needs to filter out commands |
| 73 | which are not compiler calls. Needs to find the source file name |
| 74 | from the arguments. And do shell escaping on the command. |
| 75 | |
| 76 | To support incremental builds, it is desired to read elements from |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 77 | an existing compilation database from a previous run. These elements |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 78 | shall be merged with the new elements. """ |
| 79 | |
| 80 | # create entries from the current run |
| 81 | current = itertools.chain.from_iterable( |
| 82 | # creates a sequence of entry generators from an exec, |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 83 | format_entry(command) for command in commands) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 84 | # read entries from previous run |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 85 | if 'append' in args and args.append and os.path.isfile(args.cdb): |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 86 | with open(args.cdb) as handle: |
| 87 | previous = iter(json.load(handle)) |
| 88 | else: |
| 89 | previous = iter([]) |
| 90 | # filter out duplicate entries from both |
| 91 | duplicate = duplicate_check(entry_hash) |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 92 | return (entry |
| 93 | for entry in itertools.chain(previous, current) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 94 | if os.path.exists(entry['file']) and not duplicate(entry)) |
| 95 | |
| 96 | with TemporaryDirectory(prefix='intercept-', dir=tempdir()) as tmp_dir: |
| 97 | # run the build command |
| 98 | environment = setup_environment(args, tmp_dir, bin_dir) |
Laszlo Nagy | 52c1d7e | 2017-02-14 10:30:50 +0000 | [diff] [blame] | 99 | exit_code = run_build(args.build, env=environment) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 100 | # read the intercepted exec calls |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 101 | exec_traces = itertools.chain.from_iterable( |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 102 | parse_exec_trace(os.path.join(tmp_dir, filename)) |
| 103 | for filename in sorted(glob.iglob(os.path.join(tmp_dir, '*.cmd')))) |
Laszlo Nagy | 3a55611 | 2017-01-29 04:59:32 +0000 | [diff] [blame] | 104 | # do post processing |
| 105 | entries = post_processing(exec_traces) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 106 | # dump the compilation database |
| 107 | with open(args.cdb, 'w+') as handle: |
| 108 | json.dump(list(entries), handle, sort_keys=True, indent=4) |
| 109 | return exit_code |
| 110 | |
| 111 | |
| 112 | def setup_environment(args, destination, bin_dir): |
| 113 | """ Sets up the environment for the build command. |
| 114 | |
| 115 | It sets the required environment variables and execute the given command. |
| 116 | The exec calls will be logged by the 'libear' preloaded library or by the |
| 117 | 'wrapper' programs. """ |
| 118 | |
| 119 | c_compiler = args.cc if 'cc' in args else 'cc' |
| 120 | cxx_compiler = args.cxx if 'cxx' in args else 'c++' |
| 121 | |
| 122 | libear_path = None if args.override_compiler or is_preload_disabled( |
| 123 | sys.platform) else build_libear(c_compiler, destination) |
| 124 | |
| 125 | environment = dict(os.environ) |
| 126 | environment.update({'INTERCEPT_BUILD_TARGET_DIR': destination}) |
| 127 | |
| 128 | if not libear_path: |
| 129 | logging.debug('intercept gonna use compiler wrappers') |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 130 | environment.update(wrapper_environment(args)) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 131 | environment.update({ |
| 132 | 'CC': os.path.join(bin_dir, COMPILER_WRAPPER_CC), |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 133 | 'CXX': os.path.join(bin_dir, COMPILER_WRAPPER_CXX) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 134 | }) |
| 135 | elif sys.platform == 'darwin': |
| 136 | logging.debug('intercept gonna preload libear on OSX') |
| 137 | environment.update({ |
| 138 | 'DYLD_INSERT_LIBRARIES': libear_path, |
| 139 | 'DYLD_FORCE_FLAT_NAMESPACE': '1' |
| 140 | }) |
| 141 | else: |
| 142 | logging.debug('intercept gonna preload libear on UNIX') |
| 143 | environment.update({'LD_PRELOAD': libear_path}) |
| 144 | |
| 145 | return environment |
| 146 | |
| 147 | |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 148 | @command_entry_point |
| 149 | def intercept_compiler_wrapper(): |
| 150 | """ Entry point for `intercept-cc` and `intercept-c++`. """ |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 151 | |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 152 | return compiler_wrapper(intercept_compiler_wrapper_impl) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 153 | |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 154 | |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 155 | def intercept_compiler_wrapper_impl(_, execution): |
| 156 | """ Implement intercept compiler wrapper functionality. |
| 157 | |
| 158 | It does generate execution report into target directory. |
| 159 | The target directory name is from environment variables. """ |
| 160 | |
| 161 | message_prefix = 'execution report might be incomplete: %s' |
| 162 | |
| 163 | target_dir = os.getenv('INTERCEPT_BUILD_TARGET_DIR') |
| 164 | if not target_dir: |
| 165 | logging.warning(message_prefix, 'missing target directory') |
| 166 | return |
| 167 | # write current execution info to the pid file |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 168 | try: |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 169 | target_file_name = str(os.getpid()) + TRACE_FILE_EXTENSION |
| 170 | target_file = os.path.join(target_dir, target_file_name) |
| 171 | logging.debug('writing execution report to: %s', target_file) |
| 172 | write_exec_trace(target_file, execution) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 173 | except IOError: |
Laszlo Nagy | 2e9c922 | 2017-03-04 01:08:05 +0000 | [diff] [blame^] | 174 | logging.warning(message_prefix, 'io problem') |
| 175 | |
| 176 | |
| 177 | def write_exec_trace(filename, entry): |
| 178 | """ Write execution report file. |
| 179 | |
| 180 | This method shall be sync with the execution report writer in interception |
| 181 | library. The entry in the file is a JSON objects. |
| 182 | |
| 183 | :param filename: path to the output execution trace file, |
| 184 | :param entry: the Execution object to append to that file. """ |
| 185 | |
| 186 | with open(filename, 'ab') as handler: |
| 187 | pid = str(entry.pid) |
| 188 | command = US.join(entry.cmd) + US |
| 189 | content = RS.join([pid, pid, 'wrapper', entry.cwd, command]) + GS |
| 190 | handler.write(content.encode('utf-8')) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 191 | |
| 192 | |
| 193 | def parse_exec_trace(filename): |
| 194 | """ Parse the file generated by the 'libear' preloaded library. |
| 195 | |
| 196 | Given filename points to a file which contains the basic report |
| 197 | generated by the interception library or wrapper command. A single |
| 198 | report file _might_ contain multiple process creation info. """ |
| 199 | |
| 200 | logging.debug('parse exec trace file: %s', filename) |
| 201 | with open(filename, 'r') as handler: |
| 202 | content = handler.read() |
| 203 | for group in filter(bool, content.split(GS)): |
| 204 | records = group.split(RS) |
| 205 | yield { |
| 206 | 'pid': records[0], |
| 207 | 'ppid': records[1], |
| 208 | 'function': records[2], |
| 209 | 'directory': records[3], |
| 210 | 'command': records[4].split(US)[:-1] |
| 211 | } |
| 212 | |
| 213 | |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 214 | def format_entry(exec_trace): |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 215 | """ Generate the desired fields for compilation database entries. """ |
| 216 | |
| 217 | def abspath(cwd, name): |
| 218 | """ Create normalized absolute path from input filename. """ |
| 219 | fullname = name if os.path.isabs(name) else os.path.join(cwd, name) |
| 220 | return os.path.normpath(fullname) |
| 221 | |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 222 | logging.debug('format this command: %s', exec_trace['command']) |
| 223 | compilation = split_command(exec_trace['command']) |
| 224 | if compilation: |
| 225 | for source in compilation.files: |
| 226 | compiler = 'c++' if compilation.compiler == 'c++' else 'cc' |
| 227 | command = [compiler, '-c'] + compilation.flags + [source] |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 228 | logging.debug('formated as: %s', command) |
| 229 | yield { |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 230 | 'directory': exec_trace['directory'], |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 231 | 'command': encode(command), |
Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 232 | 'file': abspath(exec_trace['directory'], source) |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 233 | } |
| 234 | |
| 235 | |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 236 | def is_preload_disabled(platform): |
| 237 | """ Library-based interposition will fail silently if SIP is enabled, |
| 238 | so this should be detected. You can detect whether SIP is enabled on |
| 239 | Darwin by checking whether (1) there is a binary called 'csrutil' in |
| 240 | the path and, if so, (2) whether the output of executing 'csrutil status' |
| 241 | contains 'System Integrity Protection status: enabled'. |
| 242 | |
Laszlo Nagy | 46fc18a | 2017-01-28 22:48:26 +0000 | [diff] [blame] | 243 | :param platform: name of the platform (returned by sys.platform), |
| 244 | :return: True if library preload will fail by the dynamic linker. """ |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 245 | |
Laszlo Nagy | 46fc18a | 2017-01-28 22:48:26 +0000 | [diff] [blame] | 246 | if platform in WRAPPER_ONLY_PLATFORMS: |
| 247 | return True |
| 248 | elif platform == 'darwin': |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 249 | command = ['csrutil', 'status'] |
Laszlo Nagy | 46fc18a | 2017-01-28 22:48:26 +0000 | [diff] [blame] | 250 | pattern = re.compile(r'System Integrity Protection status:\s+enabled') |
| 251 | try: |
| 252 | return any(pattern.match(line) for line in run_command(command)) |
| 253 | except: |
| 254 | return False |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 255 | else: |
| 256 | return False |
| 257 | |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 258 | |
| 259 | def entry_hash(entry): |
| 260 | """ Implement unique hash method for compilation database entries. """ |
| 261 | |
| 262 | # For faster lookup in set filename is reverted |
| 263 | filename = entry['file'][::-1] |
| 264 | # For faster lookup in set directory is reverted |
| 265 | directory = entry['directory'][::-1] |
| 266 | # On OS X the 'cc' and 'c++' compilers are wrappers for |
| 267 | # 'clang' therefore both call would be logged. To avoid |
| 268 | # this the hash does not contain the first word of the |
| 269 | # command. |
| 270 | command = ' '.join(decode(entry['command'])[1:]) |
| 271 | |
| 272 | return '<>'.join([filename, directory, command]) |
| 273 | |
| 274 | |
| 275 | def create_parser(): |
| 276 | """ Command line argument parser factory method. """ |
| 277 | |
| 278 | parser = argparse.ArgumentParser( |
| 279 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
| 280 | |
| 281 | parser.add_argument( |
| 282 | '--verbose', '-v', |
| 283 | action='count', |
| 284 | default=0, |
| 285 | help="""Enable verbose output from '%(prog)s'. A second and third |
| 286 | flag increases verbosity.""") |
| 287 | parser.add_argument( |
| 288 | '--cdb', |
| 289 | metavar='<file>', |
| 290 | default="compile_commands.json", |
| 291 | help="""The JSON compilation database.""") |
| 292 | group = parser.add_mutually_exclusive_group() |
| 293 | group.add_argument( |
| 294 | '--append', |
| 295 | action='store_true', |
| 296 | help="""Append new entries to existing compilation database.""") |
Laszlo Nagy | bc68758 | 2016-01-12 22:38:41 +0000 | [diff] [blame] | 297 | |
| 298 | advanced = parser.add_argument_group('advanced options') |
| 299 | advanced.add_argument( |
| 300 | '--override-compiler', |
| 301 | action='store_true', |
| 302 | help="""Always resort to the compiler wrapper even when better |
| 303 | intercept methods are available.""") |
| 304 | advanced.add_argument( |
| 305 | '--use-cc', |
| 306 | metavar='<path>', |
| 307 | dest='cc', |
| 308 | default='cc', |
| 309 | help="""When '%(prog)s' analyzes a project by interposing a compiler |
| 310 | wrapper, which executes a real compiler for compilation and |
| 311 | do other tasks (record the compiler invocation). Because of |
| 312 | this interposing, '%(prog)s' does not know what compiler your |
| 313 | project normally uses. Instead, it simply overrides the CC |
| 314 | environment variable, and guesses your default compiler. |
| 315 | |
| 316 | If you need '%(prog)s' to use a specific compiler for |
| 317 | *compilation* then you can use this option to specify a path |
| 318 | to that compiler.""") |
| 319 | advanced.add_argument( |
| 320 | '--use-c++', |
| 321 | metavar='<path>', |
| 322 | dest='cxx', |
| 323 | default='c++', |
| 324 | help="""This is the same as "--use-cc" but for C++ code.""") |
| 325 | |
| 326 | parser.add_argument( |
| 327 | dest='build', |
| 328 | nargs=argparse.REMAINDER, |
| 329 | help="""Command to run.""") |
| 330 | |
| 331 | return parser |