blob: ef906fa60b9be26fca9e49cec1d68a466eac8f22 [file] [log] [blame]
Laszlo Nagy8bd63e52016-04-19 12:03:03 +00001# -*- coding: utf-8 -*-
2# The LLVM Compiler Infrastructure
3#
4# This file is distributed under the University of Illinois Open Source
5# License. See LICENSE.TXT for details.
6""" This module is responsible for to parse a compiler invocation. """
7
8import re
9import os
10import collections
11
12__all__ = ['split_command', 'classify_source', 'compiler_language']
13
14# Ignored compiler options map for compilation database creation.
15# The map is used in `split_command` method. (Which does ignore and classify
16# parameters.) Please note, that these are not the only parameters which
17# might be ignored.
18#
19# Keys are the option name, value number of options to skip
20IGNORED_FLAGS = {
21 # compiling only flag, ignored because the creator of compilation
22 # database will explicitly set it.
23 '-c': 0,
24 # preprocessor macros, ignored because would cause duplicate entries in
25 # the output (the only difference would be these flags). this is actual
26 # finding from users, who suffered longer execution time caused by the
27 # duplicates.
28 '-MD': 0,
29 '-MMD': 0,
30 '-MG': 0,
31 '-MP': 0,
32 '-MF': 1,
33 '-MT': 1,
34 '-MQ': 1,
35 # linker options, ignored because for compilation database will contain
36 # compilation commands only. so, the compiler would ignore these flags
37 # anyway. the benefit to get rid of them is to make the output more
38 # readable.
39 '-static': 0,
40 '-shared': 0,
41 '-s': 0,
42 '-rdynamic': 0,
43 '-l': 1,
44 '-L': 1,
45 '-u': 1,
46 '-z': 1,
47 '-T': 1,
48 '-Xlinker': 1
49}
50
51# Known C/C++ compiler executable name patterns
52COMPILER_PATTERNS = frozenset([
53 re.compile(r'^(intercept-|analyze-|)c(c|\+\+)$'),
54 re.compile(r'^([^-]*-)*[mg](cc|\+\+)(-\d+(\.\d+){0,2})?$'),
55 re.compile(r'^([^-]*-)*clang(\+\+)?(-\d+(\.\d+){0,2})?$'),
56 re.compile(r'^llvm-g(cc|\+\+)$'),
57])
58
59
60def split_command(command):
61 """ Returns a value when the command is a compilation, None otherwise.
62
63 The value on success is a named tuple with the following attributes:
64
65 files: list of source files
66 flags: list of compile options
67 compiler: string value of 'c' or 'c++' """
68
69 # the result of this method
70 result = collections.namedtuple('Compilation',
71 ['compiler', 'flags', 'files'])
72 result.compiler = compiler_language(command)
73 result.flags = []
74 result.files = []
75 # quit right now, if the program was not a C/C++ compiler
76 if not result.compiler:
77 return None
78 # iterate on the compile options
79 args = iter(command[1:])
80 for arg in args:
81 # quit when compilation pass is not involved
82 if arg in {'-E', '-S', '-cc1', '-M', '-MM', '-###'}:
83 return None
84 # ignore some flags
85 elif arg in IGNORED_FLAGS:
86 count = IGNORED_FLAGS[arg]
87 for _ in range(count):
88 next(args)
89 elif re.match(r'^-(l|L|Wl,).+', arg):
90 pass
91 # some parameters could look like filename, take as compile option
92 elif arg in {'-D', '-I'}:
93 result.flags.extend([arg, next(args)])
94 # parameter which looks source file is taken...
95 elif re.match(r'^[^-].+', arg) and classify_source(arg):
96 result.files.append(arg)
97 # and consider everything else as compile option.
98 else:
99 result.flags.append(arg)
100 # do extra check on number of source files
101 return result if result.files else None
102
103
104def classify_source(filename, c_compiler=True):
105 """ Return the language from file name extension. """
106
107 mapping = {
108 '.c': 'c' if c_compiler else 'c++',
109 '.i': 'c-cpp-output' if c_compiler else 'c++-cpp-output',
110 '.ii': 'c++-cpp-output',
111 '.m': 'objective-c',
112 '.mi': 'objective-c-cpp-output',
113 '.mm': 'objective-c++',
114 '.mii': 'objective-c++-cpp-output',
115 '.C': 'c++',
116 '.cc': 'c++',
117 '.CC': 'c++',
118 '.cp': 'c++',
119 '.cpp': 'c++',
120 '.cxx': 'c++',
121 '.c++': 'c++',
122 '.C++': 'c++',
123 '.txx': 'c++'
124 }
125
126 __, extension = os.path.splitext(os.path.basename(filename))
127 return mapping.get(extension)
128
129
130def compiler_language(command):
131 """ A predicate to decide the command is a compiler call or not.
132
133 Returns 'c' or 'c++' when it match. None otherwise. """
134
135 cplusplus = re.compile(r'^(.+)(\+\+)(-.+|)$')
136
137 if command:
138 executable = os.path.basename(command[0])
139 if any(pattern.match(executable) for pattern in COMPILER_PATTERNS):
140 return 'c++' if cplusplus.match(executable) else 'c'
141 return None