eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 1 | #----------------------------------------------------------------- |
| 2 | # pycparser: c-to-c.py |
| 3 | # |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 4 | # Example of a C code generator from pycparser AST nodes, serving |
| 5 | # as a simplistic translator from C to AST and back to C. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 6 | # |
| 7 | # Copyright (C) 2008-2011, Eli Bendersky |
| 8 | # License: LGPL |
| 9 | #----------------------------------------------------------------- |
| 10 | from __future__ import print_function |
| 11 | import sys |
| 12 | |
| 13 | # This is not required if you've installed pycparser into |
| 14 | # your site-packages/ with setup.py |
| 15 | # |
| 16 | sys.path.insert(0, '..') |
| 17 | |
| 18 | from pycparser import c_parser, c_ast, parse_file |
| 19 | |
| 20 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 21 | class CGenerator(object): |
| 22 | """ Uses the same visitor pattern as c_ast.NodeVisitor, but modified to |
| 23 | return a value from each visit method, using string accumulation in |
| 24 | generic_visit. |
| 25 | """ |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 26 | def __init__(self): |
| 27 | self.output = '' |
| 28 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 29 | # Statements start with indentation of self.indent_level spaces, using |
| 30 | # the _make_indent method |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 31 | # |
| 32 | self.indent_level = 0 |
| 33 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 34 | def _make_indent(self): |
| 35 | return ' ' * self.indent_level |
| 36 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 37 | def visit(self, node): |
| 38 | method = 'visit_' + node.__class__.__name__ |
| 39 | return getattr(self, method, self.generic_visit)(node) |
| 40 | |
| 41 | def generic_visit(self, node): |
| 42 | #~ print('generic:', type(node)) |
| 43 | if node is None: |
| 44 | return '' |
| 45 | else: |
| 46 | return ''.join(self.visit(c) for c in node.children()) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 47 | |
| 48 | def visit_Constant(self, n): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 49 | return n.value |
| 50 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 51 | def visit_ID(self, n): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 52 | return n.name |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 53 | |
| 54 | def visit_ArrayRef(self, n): |
| 55 | return self.visit(n.name) + '[' + self.visit(n.subscript) + ']' |
| 56 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 57 | def visit_UnaryOp(self, n): |
| 58 | if n.op == 'p++': |
| 59 | return '%s++' % self.visit(n.expr) |
| 60 | else: |
| 61 | return '%s%s' % (n.op, self.visit(n.expr)) |
| 62 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 63 | def visit_BinaryOp(self, n): |
| 64 | return '%s %s %s' % (self.visit(n.left), n.op, self.visit(n.right)) |
| 65 | |
| 66 | def visit_Assignment(self, n): |
| 67 | return '%s %s %s' % (self.visit(n.lvalue), n.op, self.visit(n.rvalue)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 68 | |
| 69 | def visit_IdentifierType(self, n): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 70 | return ' '.join(n.names) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 71 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 72 | def visit_Decl(self, n): |
| 73 | s = self._generate_decl(n) |
| 74 | if n.bitsize: s += ' : ' + self.visit(n.bitsize) |
| 75 | if n.init: s += ' = ' + self.visit(n.init) |
| 76 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 77 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 78 | def visit_Typedef(self, n): |
| 79 | s = '' |
| 80 | if n.storage: s += ' '.join(n.storage) + ' ' |
| 81 | s += self._generate_type(n.type) |
| 82 | return s |
| 83 | |
| 84 | def visit_Cast(self, n): |
| 85 | s = '(' + self.visit(n.to_type) + ')' |
| 86 | return s + ' ' + self.visit(n.expr) |
| 87 | |
| 88 | def visit_Enum(self, n): |
| 89 | s = 'enum' |
| 90 | if n.name: s += ' ' + n.name |
| 91 | if n.values: |
| 92 | s += ' {' |
| 93 | for i, enumerator in enumerate(n.values.enumerators): |
| 94 | s += enumerator.name |
| 95 | if enumerator.value: |
| 96 | s += ' = ' + self.visit(enumerator.value) |
| 97 | if i != len(n.values.enumerators) - 1: |
| 98 | s += ', ' |
| 99 | s += '}' |
| 100 | return s |
| 101 | |
| 102 | def visit_Struct(self, n): |
| 103 | s = 'struct' |
| 104 | if n.name: s += ' ' + n.name |
| 105 | if n.decls: |
| 106 | s += ' { \n' |
| 107 | for decl in n.decls: |
| 108 | s += ' ' + self.visit(decl) + ';\n' |
| 109 | s += '}' |
| 110 | return s |
| 111 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 112 | def visit_FuncDef(self, n): |
| 113 | decl = self.visit(n.decl) |
| 114 | self.indent_level = 0 |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 115 | # The body is a Compound node |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 116 | body = self.visit(n.body) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 117 | return decl + '\n' + body + '\n' |
| 118 | |
| 119 | def visit_FileAST(self, n): |
| 120 | s = '' |
| 121 | for ext in n.ext: |
| 122 | if isinstance(ext, c_ast.FuncDef): |
| 123 | s += self.visit(ext) |
| 124 | else: |
| 125 | s += self.visit(ext) + ';\n' |
| 126 | return s |
| 127 | |
| 128 | def visit_Compound(self, n): |
| 129 | s = self._make_indent() + '{\n' |
| 130 | self.indent_level += 2 |
| 131 | s += ''.join(self._generate_stmt(stmt) for stmt in n.block_items) |
| 132 | self.indent_level -= 2 |
| 133 | s += self._make_indent() + '}\n' |
| 134 | return s |
| 135 | |
| 136 | def visit_ParamList(self, n): |
| 137 | return ', '.join(self.visit(param) for param in n.params) |
| 138 | |
| 139 | def visit_Return(self, n): |
| 140 | s = 'return' |
| 141 | if n.expr: s += ' ' + self.visit(n.expr) |
| 142 | return s + ';' |
| 143 | |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 144 | def visit_Break(self, n): |
| 145 | return 'break;' |
| 146 | |
| 147 | def visit_Continue(self, n): |
| 148 | return 'continue;' |
| 149 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 150 | def visit_For(self, n): |
| 151 | s = 'for (' |
| 152 | if n.init: s += self.visit(n.init) |
| 153 | s += ';' |
| 154 | if n.cond: s += ' ' + self.visit(n.cond) |
| 155 | s += ';' |
| 156 | if n.next: s += ' ' + self.visit(n.next) |
| 157 | s += ')\n' |
| 158 | s += self._generate_stmt(n.stmt, add_indent=True) |
| 159 | return s |
| 160 | |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 161 | def visit_While(self, n): |
| 162 | s = 'while (' |
| 163 | if n.cond: s += self.visit(n.cond) |
| 164 | s += ')\n' |
| 165 | s += self._generate_stmt(n.stmt, add_indent=True) |
| 166 | return s |
| 167 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 168 | def _generate_stmt(self, n, add_indent=False): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 169 | """ Generation from a statement node. This method exists as a wrapper |
| 170 | for individual visit_* methods to handle different treatment of |
| 171 | some statements in this context. |
| 172 | """ |
| 173 | typ = type(n) |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 174 | if add_indent: self.indent_level += 2 |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 175 | indent = self._make_indent() |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 176 | if add_indent: self.indent_level -= 2 |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 177 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 178 | if typ in ( c_ast.Decl, c_ast.Assignment, c_ast.Cast, c_ast.UnaryOp, |
| 179 | c_ast.BinaryOp): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 180 | # These can also appear in an expression context so no semicolon |
| 181 | # is added to them automatically |
| 182 | # |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 183 | return indent + self.visit(n) + ';\n' |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 184 | elif typ in (c_ast.Compound,): |
| 185 | # No extra indentation required before the opening brace of a |
| 186 | # compound - because it consists of multiple lines it has to |
| 187 | # compute its own indentation. |
| 188 | # |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 189 | return self.visit(n) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 190 | else: |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 191 | return indent + self.visit(n) + '\n' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 192 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 193 | def _generate_decl(self, n): |
| 194 | """ Generation from a Decl node. |
| 195 | """ |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 196 | s = '' |
| 197 | if n.funcspec: s = ' '.join(n.funcspec) + ' ' |
| 198 | if n.storage: s += ' '.join(n.storage) + ' ' |
| 199 | s += self._generate_type(n.type) |
| 200 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 201 | |
| 202 | def _generate_type(self, n, modifiers=[]): |
| 203 | """ Recursive generation from a type node. n is the type node. |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 204 | modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers |
| 205 | encountered on the way down to a TypeDecl, to allow proper |
| 206 | generation from it. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 207 | """ |
| 208 | typ = type(n) |
| 209 | #~ print(n, modifiers) |
| 210 | |
| 211 | if typ == c_ast.TypeDecl: |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 212 | s = '' |
| 213 | if n.quals: s += ' '.join(n.quals) + ' ' |
| 214 | s += self.visit(n.type) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 215 | |
| 216 | nstr = n.declname if n.declname else '' |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 217 | # Resolve modifiers. |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 218 | # Wrap in parens to distinguish pointer to array and pointer to |
| 219 | # function syntax. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 220 | # |
| 221 | for i, modifier in enumerate(modifiers): |
| 222 | if isinstance(modifier, c_ast.ArrayDecl): |
| 223 | if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): |
| 224 | nstr = '(' + nstr + ')' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 225 | nstr += '[' + self.visit(modifier.dim) + ']' |
| 226 | elif isinstance(modifier, c_ast.FuncDecl): |
| 227 | if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): |
| 228 | nstr = '(' + nstr + ')' |
| 229 | nstr += '(' + self.visit(modifier.args) + ')' |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 230 | elif isinstance(modifier, c_ast.PtrDecl): |
| 231 | nstr = '*' + nstr |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 232 | s += ' ' + nstr |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 233 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 234 | elif typ in (c_ast.Typename, c_ast.Decl): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 235 | return self._generate_decl(n.type) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 236 | elif typ == c_ast.IdentifierType: |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 237 | return ' '.join(n.names) + ' ' |
| 238 | elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl): |
| 239 | return self._generate_type(n.type, modifiers + [n]) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 240 | |
| 241 | |
| 242 | def translate_to_c(filename): |
| 243 | ast = parse_file(filename, use_cpp=True) |
| 244 | generator = CGenerator() |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 245 | print(generator.visit(ast)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 246 | |
| 247 | |
| 248 | if __name__ == "__main__": |
| 249 | if len(sys.argv) > 1: |
| 250 | translate_to_c(sys.argv[1]) |
| 251 | else: |
| 252 | src = r''' |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 253 | static unsigned int hash_func(const char* str, unsigned int table_size) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 254 | { |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 255 | unsigned int hash_value; |
| 256 | unsigned int a = 127; |
| 257 | a++; |
| 258 | ++a; |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 259 | |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 260 | while (hash_value == 0) { |
| 261 | hash_value = (a*hash_value + *str) % table_size; |
| 262 | break; |
| 263 | } |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 264 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame] | 265 | return hash_value; |
| 266 | } |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 267 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 268 | ''' |
| 269 | parser = c_parser.CParser() |
| 270 | ast = parser.parse(src) |
| 271 | ast.show() |
| 272 | generator = CGenerator() |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 273 | print(generator.visit(ast)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 274 | |
| 275 | print("Please provide a filename as argument") |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 276 | |
| 277 | |
| 278 | # ZZZ: operator precedence in expressions - especially problematic in |
| 279 | # assignments... - where to parenthesize? maybe just in BinaryOp? |
| 280 | # Other precedence-important operators (such as cast) need parens as well |
| 281 | |
eli.bendersky | fc96e5e | 2011-03-04 09:51:23 +0200 | [diff] [blame^] | 282 | # ZZZ: turn self.indent_level += 2 ... -= 2 into a context manager! |
| 283 | |