Larry Hastings | 7726ac9 | 2014-01-31 22:03:12 -0800 | [diff] [blame] | 1 | import re |
| 2 | import sys |
| 3 | |
| 4 | def negate(condition): |
| 5 | """ |
| 6 | Returns a CPP conditional that is the opposite of the conditional passed in. |
| 7 | """ |
| 8 | if condition.startswith('!'): |
| 9 | return condition[1:] |
| 10 | return "!" + condition |
| 11 | |
| 12 | class Monitor: |
| 13 | """ |
| 14 | A simple C preprocessor that scans C source and computes, line by line, |
| 15 | what the current C preprocessor #if state is. |
| 16 | |
| 17 | Doesn't handle everything--for example, if you have /* inside a C string, |
| 18 | without a matching */ (also inside a C string), or with a */ inside a C |
| 19 | string but on another line and with preprocessor macros in between... |
| 20 | the parser will get lost. |
| 21 | |
| 22 | Anyway this implementation seems to work well enough for the CPython sources. |
| 23 | """ |
| 24 | |
| 25 | is_a_simple_defined = re.compile(r'^defined\s*\(\s*[A-Za-z0-9_]+\s*\)$').match |
| 26 | |
| 27 | def __init__(self, filename=None, *, verbose=False): |
| 28 | self.stack = [] |
| 29 | self.in_comment = False |
| 30 | self.continuation = None |
| 31 | self.line_number = 0 |
| 32 | self.filename = filename |
| 33 | self.verbose = verbose |
| 34 | |
| 35 | def __repr__(self): |
| 36 | return ''.join(( |
| 37 | '<Monitor ', |
| 38 | str(id(self)), |
| 39 | " line=", str(self.line_number), |
| 40 | " condition=", repr(self.condition()), |
| 41 | ">")) |
| 42 | |
| 43 | def status(self): |
| 44 | return str(self.line_number).rjust(4) + ": " + self.condition() |
| 45 | |
| 46 | def condition(self): |
| 47 | """ |
| 48 | Returns the current preprocessor state, as a single #if condition. |
| 49 | """ |
| 50 | return " && ".join(condition for token, condition in self.stack) |
| 51 | |
| 52 | def fail(self, *a): |
| 53 | if self.filename: |
| 54 | filename = " " + self.filename |
| 55 | else: |
| 56 | filename = '' |
| 57 | print("Error at" + filename, "line", self.line_number, ":") |
| 58 | print(" ", ' '.join(str(x) for x in a)) |
| 59 | sys.exit(-1) |
| 60 | |
| 61 | def close(self): |
| 62 | if self.stack: |
| 63 | self.fail("Ended file while still in a preprocessor conditional block!") |
| 64 | |
| 65 | def write(self, s): |
| 66 | for line in s.split("\n"): |
| 67 | self.writeline(line) |
| 68 | |
| 69 | def writeline(self, line): |
| 70 | self.line_number += 1 |
| 71 | line = line.strip() |
| 72 | |
| 73 | def pop_stack(): |
| 74 | if not self.stack: |
| 75 | self.fail("#" + token + " without matching #if / #ifdef / #ifndef!") |
| 76 | return self.stack.pop() |
| 77 | |
| 78 | if self.continuation: |
| 79 | line = self.continuation + line |
| 80 | self.continuation = None |
| 81 | |
| 82 | if not line: |
| 83 | return |
| 84 | |
| 85 | if line.endswith('\\'): |
| 86 | self.continuation = line[:-1].rstrip() + " " |
| 87 | return |
| 88 | |
| 89 | # we have to ignore preprocessor commands inside comments |
| 90 | # |
| 91 | # we also have to handle this: |
| 92 | # /* start |
| 93 | # ... |
| 94 | # */ /* <-- tricky! |
| 95 | # ... |
| 96 | # */ |
| 97 | # and this: |
| 98 | # /* start |
| 99 | # ... |
| 100 | # */ /* also tricky! */ |
| 101 | if self.in_comment: |
| 102 | if '*/' in line: |
| 103 | # snip out the comment and continue |
| 104 | # |
| 105 | # GCC allows |
| 106 | # /* comment |
| 107 | # */ #include <stdio.h> |
| 108 | # maybe other compilers too? |
| 109 | _, _, line = line.partition('*/') |
| 110 | self.in_comment = False |
| 111 | |
| 112 | while True: |
| 113 | if '/*' in line: |
| 114 | if self.in_comment: |
| 115 | self.fail("Nested block comment!") |
| 116 | |
| 117 | before, _, remainder = line.partition('/*') |
| 118 | comment, comment_ends, after = remainder.partition('*/') |
| 119 | if comment_ends: |
| 120 | # snip out the comment |
| 121 | line = before.rstrip() + ' ' + after.lstrip() |
| 122 | continue |
| 123 | # comment continues to eol |
| 124 | self.in_comment = True |
| 125 | line = before.rstrip() |
| 126 | break |
| 127 | |
| 128 | # we actually have some // comments |
| 129 | # (but block comments take precedence) |
| 130 | before, line_comment, comment = line.partition('//') |
| 131 | if line_comment: |
| 132 | line = before.rstrip() |
| 133 | |
| 134 | if not line.startswith('#'): |
| 135 | return |
| 136 | |
| 137 | line = line[1:].lstrip() |
| 138 | assert line |
| 139 | |
| 140 | fields = line.split() |
| 141 | token = fields[0].lower() |
| 142 | condition = ' '.join(fields[1:]).strip() |
| 143 | |
| 144 | if_tokens = {'if', 'ifdef', 'ifndef'} |
| 145 | all_tokens = if_tokens | {'elif', 'else', 'endif'} |
| 146 | |
| 147 | if token not in all_tokens: |
| 148 | return |
| 149 | |
| 150 | # cheat a little here, to reuse the implementation of if |
| 151 | if token == 'elif': |
| 152 | pop_stack() |
| 153 | token = 'if' |
| 154 | |
| 155 | if token in if_tokens: |
| 156 | if not condition: |
| 157 | self.fail("Invalid format for #" + token + " line: no argument!") |
| 158 | if token == 'if': |
| 159 | if not self.is_a_simple_defined(condition): |
| 160 | condition = "(" + condition + ")" |
| 161 | else: |
| 162 | fields = condition.split() |
| 163 | if len(fields) != 1: |
| 164 | self.fail("Invalid format for #" + token + " line: should be exactly one argument!") |
| 165 | symbol = fields[0] |
| 166 | condition = 'defined(' + symbol + ')' |
| 167 | if token == 'ifndef': |
| 168 | condition = '!' + condition |
| 169 | |
| 170 | self.stack.append(("if", condition)) |
| 171 | if self.verbose: |
| 172 | print(self.status()) |
| 173 | return |
| 174 | |
| 175 | previous_token, previous_condition = pop_stack() |
| 176 | |
| 177 | if token == 'else': |
| 178 | self.stack.append(('else', negate(previous_condition))) |
| 179 | elif token == 'endif': |
| 180 | pass |
| 181 | if self.verbose: |
| 182 | print(self.status()) |
| 183 | |
| 184 | if __name__ == '__main__': |
| 185 | for filename in sys.argv[1:]: |
| 186 | with open(filename, "rt") as f: |
| 187 | cpp = Monitor(filename, verbose=True) |
| 188 | print() |
| 189 | print(filename) |
| 190 | for line_number, line in enumerate(f.read().split('\n'), 1): |
| 191 | cpp.writeline(line) |