blob: 77f5f9696a6d848f92558b7210e2b4593a96f4fb [file] [log] [blame]
Larry Hastings7726ac92014-01-31 22:03:12 -08001import re
2import sys
3
4def negate(condition):
5 """
6 Returns a CPP conditional that is the opposite of the conditional passed in.
7 """
8 if condition.startswith('!'):
9 return condition[1:]
10 return "!" + condition
11
12class Monitor:
13 """
14 A simple C preprocessor that scans C source and computes, line by line,
15 what the current C preprocessor #if state is.
16
17 Doesn't handle everything--for example, if you have /* inside a C string,
18 without a matching */ (also inside a C string), or with a */ inside a C
19 string but on another line and with preprocessor macros in between...
20 the parser will get lost.
21
22 Anyway this implementation seems to work well enough for the CPython sources.
23 """
24
25 is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match
26
27 def __init__(self, filename=None, *, verbose=False):
28 self.stack = []
29 self.in_comment = False
30 self.continuation = None
31 self.line_number = 0
32 self.filename = filename
33 self.verbose = verbose
34
35 def __repr__(self):
36 return ''.join((
37 '<Monitor ',
38 str(id(self)),
39 " line=", str(self.line_number),
40 " condition=", repr(self.condition()),
41 ">"))
42
43 def status(self):
44 return str(self.line_number).rjust(4) + ": " + self.condition()
45
46 def condition(self):
47 """
48 Returns the current preprocessor state, as a single #if condition.
49 """
50 return " && ".join(condition for token, condition in self.stack)
51
52 def fail(self, *a):
53 if self.filename:
54 filename = " " + self.filename
55 else:
56 filename = ''
57 print("Error at" + filename, "line", self.line_number, ":")
58 print(" ", ' '.join(str(x) for x in a))
59 sys.exit(-1)
60
61 def close(self):
62 if self.stack:
63 self.fail("Ended file while still in a preprocessor conditional block!")
64
65 def write(self, s):
66 for line in s.split("\n"):
67 self.writeline(line)
68
69 def writeline(self, line):
70 self.line_number += 1
71 line = line.strip()
72
73 def pop_stack():
74 if not self.stack:
75 self.fail("#" + token + " without matching #if / #ifdef / #ifndef!")
76 return self.stack.pop()
77
78 if self.continuation:
79 line = self.continuation + line
80 self.continuation = None
81
82 if not line:
83 return
84
85 if line.endswith('\\'):
86 self.continuation = line[:-1].rstrip() + " "
87 return
88
89 # we have to ignore preprocessor commands inside comments
90 #
91 # we also have to handle this:
92 # /* start
93 # ...
94 # */ /* <-- tricky!
95 # ...
96 # */
97 # and this:
98 # /* start
99 # ...
100 # */ /* also tricky! */
101 if self.in_comment:
102 if '*/' in line:
103 # snip out the comment and continue
104 #
105 # GCC allows
106 # /* comment
107 # */ #include <stdio.h>
108 # maybe other compilers too?
109 _, _, line = line.partition('*/')
110 self.in_comment = False
111
112 while True:
113 if '/*' in line:
114 if self.in_comment:
115 self.fail("Nested block comment!")
116
117 before, _, remainder = line.partition('/*')
118 comment, comment_ends, after = remainder.partition('*/')
119 if comment_ends:
120 # snip out the comment
121 line = before.rstrip() + ' ' + after.lstrip()
122 continue
123 # comment continues to eol
124 self.in_comment = True
125 line = before.rstrip()
126 break
127
128 # we actually have some // comments
129 # (but block comments take precedence)
130 before, line_comment, comment = line.partition('//')
131 if line_comment:
132 line = before.rstrip()
133
134 if not line.startswith('#'):
135 return
136
137 line = line[1:].lstrip()
138 assert line
139
140 fields = line.split()
141 token = fields[0].lower()
142 condition = ' '.join(fields[1:]).strip()
143
Serhiy Storchaka12446e62020-04-18 17:52:48 +0300144 if token in {'if', 'ifdef', 'ifndef', 'elif'}:
Larry Hastings7726ac92014-01-31 22:03:12 -0800145 if not condition:
146 self.fail("Invalid format for #" + token + " line: no argument!")
Serhiy Storchaka12446e62020-04-18 17:52:48 +0300147 if token in {'if', 'elif'}:
Larry Hastings7726ac92014-01-31 22:03:12 -0800148 if not self.is_a_simple_defined(condition):
149 condition = "(" + condition + ")"
Serhiy Storchaka12446e62020-04-18 17:52:48 +0300150 if token == 'elif':
151 previous_token, previous_condition = pop_stack()
152 self.stack.append((previous_token, negate(previous_condition)))
Larry Hastings7726ac92014-01-31 22:03:12 -0800153 else:
154 fields = condition.split()
155 if len(fields) != 1:
156 self.fail("Invalid format for #" + token + " line: should be exactly one argument!")
157 symbol = fields[0]
158 condition = 'defined(' + symbol + ')'
159 if token == 'ifndef':
160 condition = '!' + condition
Serhiy Storchaka12446e62020-04-18 17:52:48 +0300161 token = 'if'
Larry Hastings7726ac92014-01-31 22:03:12 -0800162
Serhiy Storchaka12446e62020-04-18 17:52:48 +0300163 self.stack.append((token, condition))
164
165 elif token == 'else':
166 previous_token, previous_condition = pop_stack()
167 self.stack.append((previous_token, negate(previous_condition)))
168
169 elif token == 'endif':
170 while pop_stack()[0] != 'if':
171 pass
172
173 else:
Larry Hastings7726ac92014-01-31 22:03:12 -0800174 return
175
Larry Hastings7726ac92014-01-31 22:03:12 -0800176 if self.verbose:
177 print(self.status())
178
179if __name__ == '__main__':
180 for filename in sys.argv[1:]:
181 with open(filename, "rt") as f:
182 cpp = Monitor(filename, verbose=True)
183 print()
184 print(filename)
185 for line_number, line in enumerate(f.read().split('\n'), 1):
186 cpp.writeline(line)