Laszlo Nagy | 8bd63e5 | 2016-04-19 12:03:03 +0000 | [diff] [blame] | 1 | # -*- coding: utf-8 -*- |
| 2 | # The LLVM Compiler Infrastructure |
| 3 | # |
| 4 | # This file is distributed under the University of Illinois Open Source |
| 5 | # License. See LICENSE.TXT for details. |
| 6 | """ This module is responsible for to parse a compiler invocation. """ |
| 7 | |
| 8 | import re |
| 9 | import os |
| 10 | import collections |
| 11 | |
| 12 | __all__ = ['split_command', 'classify_source', 'compiler_language'] |
| 13 | |
| 14 | # Ignored compiler options map for compilation database creation. |
| 15 | # The map is used in `split_command` method. (Which does ignore and classify |
| 16 | # parameters.) Please note, that these are not the only parameters which |
| 17 | # might be ignored. |
| 18 | # |
| 19 | # Keys are the option name, value number of options to skip |
| 20 | IGNORED_FLAGS = { |
| 21 | # compiling only flag, ignored because the creator of compilation |
| 22 | # database will explicitly set it. |
| 23 | '-c': 0, |
| 24 | # preprocessor macros, ignored because would cause duplicate entries in |
| 25 | # the output (the only difference would be these flags). this is actual |
| 26 | # finding from users, who suffered longer execution time caused by the |
| 27 | # duplicates. |
| 28 | '-MD': 0, |
| 29 | '-MMD': 0, |
| 30 | '-MG': 0, |
| 31 | '-MP': 0, |
| 32 | '-MF': 1, |
| 33 | '-MT': 1, |
| 34 | '-MQ': 1, |
| 35 | # linker options, ignored because for compilation database will contain |
| 36 | # compilation commands only. so, the compiler would ignore these flags |
| 37 | # anyway. the benefit to get rid of them is to make the output more |
| 38 | # readable. |
| 39 | '-static': 0, |
| 40 | '-shared': 0, |
| 41 | '-s': 0, |
| 42 | '-rdynamic': 0, |
| 43 | '-l': 1, |
| 44 | '-L': 1, |
| 45 | '-u': 1, |
| 46 | '-z': 1, |
| 47 | '-T': 1, |
| 48 | '-Xlinker': 1 |
| 49 | } |
| 50 | |
| 51 | # Known C/C++ compiler executable name patterns |
| 52 | COMPILER_PATTERNS = frozenset([ |
| 53 | re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'), |
| 54 | re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'), |
| 55 | re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'), |
| 56 | re.compile(r'^llvm-g(cc|\+\+)$'), |
| 57 | ]) |
| 58 | |
| 59 | |
| 60 | def split_command(command): |
| 61 | """ Returns a value when the command is a compilation, None otherwise. |
| 62 | |
| 63 | The value on success is a named tuple with the following attributes: |
| 64 | |
| 65 | files: list of source files |
| 66 | flags: list of compile options |
| 67 | compiler: string value of 'c' or 'c++' """ |
| 68 | |
| 69 | # the result of this method |
| 70 | result = collections.namedtuple('Compilation', |
| 71 | ['compiler', 'flags', 'files']) |
| 72 | result.compiler = compiler_language(command) |
| 73 | result.flags = [] |
| 74 | result.files = [] |
| 75 | # quit right now, if the program was not a C/C++ compiler |
| 76 | if not result.compiler: |
| 77 | return None |
| 78 | # iterate on the compile options |
| 79 | args = iter(command[1:]) |
| 80 | for arg in args: |
| 81 | # quit when compilation pass is not involved |
| 82 | if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}: |
| 83 | return None |
| 84 | # ignore some flags |
| 85 | elif arg in IGNORED_FLAGS: |
| 86 | count = IGNORED_FLAGS[arg] |
| 87 | for _ in range(count): |
| 88 | next(args) |
| 89 | elif re.match(r'^-(l|L|Wl,).+', arg): |
| 90 | pass |
| 91 | # some parameters could look like filename, take as compile option |
| 92 | elif arg in {'-D', '-I'}: |
| 93 | result.flags.extend([arg, next(args)]) |
| 94 | # parameter which looks source file is taken... |
| 95 | elif re.match(r'^[^-].+', arg) and classify_source(arg): |
| 96 | result.files.append(arg) |
| 97 | # and consider everything else as compile option. |
| 98 | else: |
| 99 | result.flags.append(arg) |
| 100 | # do extra check on number of source files |
| 101 | return result if result.files else None |
| 102 | |
| 103 | |
| 104 | def classify_source(filename, c_compiler=True): |
| 105 | """ Return the language from file name extension. """ |
| 106 | |
| 107 | mapping = { |
| 108 | '.c': 'c' if c_compiler else 'c++', |
| 109 | '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output', |
| 110 | '.ii': 'c++-cpp-output', |
| 111 | '.m': 'objective-c', |
| 112 | '.mi': 'objective-c-cpp-output', |
| 113 | '.mm': 'objective-c++', |
| 114 | '.mii': 'objective-c++-cpp-output', |
| 115 | '.C': 'c++', |
| 116 | '.cc': 'c++', |
| 117 | '.CC': 'c++', |
| 118 | '.cp': 'c++', |
| 119 | '.cpp': 'c++', |
| 120 | '.cxx': 'c++', |
| 121 | '.c++': 'c++', |
| 122 | '.C++': 'c++', |
| 123 | '.txx': 'c++' |
| 124 | } |
| 125 | |
| 126 | __, extension = os.path.splitext(os.path.basename(filename)) |
| 127 | return mapping.get(extension) |
| 128 | |
| 129 | |
| 130 | def compiler_language(command): |
| 131 | """ A predicate to decide the command is a compiler call or not. |
| 132 | |
| 133 | Returns 'c' or 'c++' when it match. None otherwise. """ |
| 134 | |
| 135 | cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$') |
| 136 | |
| 137 | if command: |
| 138 | executable = os.path.basename(command[0]) |
| 139 | if any(pattern.match(executable) for pattern in COMPILER_PATTERNS): |
| 140 | return 'c++' if cplusplus.match(executable) else 'c' |
| 141 | return None |