eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 1 | #----------------------------------------------------------------- |
| 2 | # pycparser: c-to-c.py |
| 3 | # |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 4 | # Example of a C code generator from pycparser AST nodes, serving |
| 5 | # as a simplistic translator from C to AST and back to C. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 6 | # |
| 7 | # Copyright (C) 2008-2011, Eli Bendersky |
| 8 | # License: LGPL |
| 9 | #----------------------------------------------------------------- |
| 10 | from __future__ import print_function |
| 11 | import sys |
| 12 | |
| 13 | # This is not required if you've installed pycparser into |
| 14 | # your site-packages/ with setup.py |
| 15 | # |
| 16 | sys.path.insert(0, '..') |
| 17 | |
| 18 | from pycparser import c_parser, c_ast, parse_file |
| 19 | |
| 20 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 21 | class CGenerator(object): |
| 22 | """ Uses the same visitor pattern as c_ast.NodeVisitor, but modified to |
| 23 | return a value from each visit method, using string accumulation in |
| 24 | generic_visit. |
| 25 | """ |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 26 | def __init__(self): |
| 27 | self.output = '' |
| 28 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 29 | # Statements start with indentation of self.indent_level spaces, using |
| 30 | # the _make_indent method |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 31 | # |
| 32 | self.indent_level = 0 |
| 33 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 34 | def _make_indent(self): |
| 35 | return ' ' * self.indent_level |
| 36 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 37 | def visit(self, node): |
| 38 | method = 'visit_' + node.__class__.__name__ |
| 39 | return getattr(self, method, self.generic_visit)(node) |
| 40 | |
| 41 | def generic_visit(self, node): |
| 42 | #~ print('generic:', type(node)) |
| 43 | if node is None: |
| 44 | return '' |
| 45 | else: |
| 46 | return ''.join(self.visit(c) for c in node.children()) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 47 | |
| 48 | def visit_Constant(self, n): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 49 | return n.value |
| 50 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 51 | def visit_ID(self, n): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 52 | return n.name |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 53 | |
| 54 | def visit_ArrayRef(self, n): |
| 55 | return self.visit(n.name) + '[' + self.visit(n.subscript) + ']' |
| 56 | |
| 57 | def visit_BinaryOp(self, n): |
| 58 | return '%s %s %s' % (self.visit(n.left), n.op, self.visit(n.right)) |
| 59 | |
| 60 | def visit_Assignment(self, n): |
| 61 | return '%s %s %s' % (self.visit(n.lvalue), n.op, self.visit(n.rvalue)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 62 | |
| 63 | def visit_IdentifierType(self, n): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 64 | return ' '.join(n.names) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 65 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 66 | def visit_Decl(self, n): |
| 67 | s = self._generate_decl(n) |
| 68 | if n.bitsize: s += ' : ' + self.visit(n.bitsize) |
| 69 | if n.init: s += ' = ' + self.visit(n.init) |
| 70 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 71 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 72 | def visit_Typedef(self, n): |
| 73 | s = '' |
| 74 | if n.storage: s += ' '.join(n.storage) + ' ' |
| 75 | s += self._generate_type(n.type) |
| 76 | return s |
| 77 | |
| 78 | def visit_Cast(self, n): |
| 79 | s = '(' + self.visit(n.to_type) + ')' |
| 80 | return s + ' ' + self.visit(n.expr) |
| 81 | |
| 82 | def visit_Enum(self, n): |
| 83 | s = 'enum' |
| 84 | if n.name: s += ' ' + n.name |
| 85 | if n.values: |
| 86 | s += ' {' |
| 87 | for i, enumerator in enumerate(n.values.enumerators): |
| 88 | s += enumerator.name |
| 89 | if enumerator.value: |
| 90 | s += ' = ' + self.visit(enumerator.value) |
| 91 | if i != len(n.values.enumerators) - 1: |
| 92 | s += ', ' |
| 93 | s += '}' |
| 94 | return s |
| 95 | |
| 96 | def visit_Struct(self, n): |
| 97 | s = 'struct' |
| 98 | if n.name: s += ' ' + n.name |
| 99 | if n.decls: |
| 100 | s += ' { \n' |
| 101 | for decl in n.decls: |
| 102 | s += ' ' + self.visit(decl) + ';\n' |
| 103 | s += '}' |
| 104 | return s |
| 105 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 106 | def visit_FuncDef(self, n): |
| 107 | decl = self.visit(n.decl) |
| 108 | self.indent_level = 0 |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 109 | # The body is a Compound node |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 110 | body = self.visit(n.body) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 111 | return decl + '\n' + body + '\n' |
| 112 | |
| 113 | def visit_FileAST(self, n): |
| 114 | s = '' |
| 115 | for ext in n.ext: |
| 116 | if isinstance(ext, c_ast.FuncDef): |
| 117 | s += self.visit(ext) |
| 118 | else: |
| 119 | s += self.visit(ext) + ';\n' |
| 120 | return s |
| 121 | |
| 122 | def visit_Compound(self, n): |
| 123 | s = self._make_indent() + '{\n' |
| 124 | self.indent_level += 2 |
| 125 | s += ''.join(self._generate_stmt(stmt) for stmt in n.block_items) |
| 126 | self.indent_level -= 2 |
| 127 | s += self._make_indent() + '}\n' |
| 128 | return s |
| 129 | |
| 130 | def visit_ParamList(self, n): |
| 131 | return ', '.join(self.visit(param) for param in n.params) |
| 132 | |
| 133 | def visit_Return(self, n): |
| 134 | s = 'return' |
| 135 | if n.expr: s += ' ' + self.visit(n.expr) |
| 136 | return s + ';' |
| 137 | |
| 138 | def _generate_stmt(self, n): |
| 139 | """ Generation from a statement node. This method exists as a wrapper |
| 140 | for individual visit_* methods to handle different treatment of |
| 141 | some statements in this context. |
| 142 | """ |
| 143 | typ = type(n) |
| 144 | s = self._make_indent() |
| 145 | |
| 146 | if typ in (c_ast.Decl, c_ast.Assignment, c_ast.Cast): |
| 147 | # These can also appear in an expression context so no semicolon |
| 148 | # is added to them automatically |
| 149 | # |
| 150 | return s + self.visit(n) + ';\n' |
| 151 | elif typ in (c_ast.Compound,): |
| 152 | # No extra indentation required before the opening brace of a |
| 153 | # compound - because it consists of multiple lines it has to |
| 154 | # compute its own indentation. |
| 155 | # |
| 156 | return self.visit(n) + '\n' |
| 157 | else: |
| 158 | return s + self.visit(n) + '\n' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 159 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 160 | def _generate_decl(self, n): |
| 161 | """ Generation from a Decl node. |
| 162 | """ |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 163 | s = '' |
| 164 | if n.funcspec: s = ' '.join(n.funcspec) + ' ' |
| 165 | if n.storage: s += ' '.join(n.storage) + ' ' |
| 166 | s += self._generate_type(n.type) |
| 167 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 168 | |
| 169 | def _generate_type(self, n, modifiers=[]): |
| 170 | """ Recursive generation from a type node. n is the type node. |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 171 | modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers |
| 172 | encountered on the way down to a TypeDecl, to allow proper |
| 173 | generation from it. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 174 | """ |
| 175 | typ = type(n) |
| 176 | #~ print(n, modifiers) |
| 177 | |
| 178 | if typ == c_ast.TypeDecl: |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 179 | s = '' |
| 180 | if n.quals: s += ' '.join(n.quals) + ' ' |
| 181 | s += self.visit(n.type) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 182 | |
| 183 | nstr = n.declname if n.declname else '' |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 184 | # Resolve modifiers. |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 185 | # Wrap in parens to distinguish pointer to array and pointer to |
| 186 | # function syntax. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 187 | # |
| 188 | for i, modifier in enumerate(modifiers): |
| 189 | if isinstance(modifier, c_ast.ArrayDecl): |
| 190 | if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): |
| 191 | nstr = '(' + nstr + ')' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 192 | nstr += '[' + self.visit(modifier.dim) + ']' |
| 193 | elif isinstance(modifier, c_ast.FuncDecl): |
| 194 | if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): |
| 195 | nstr = '(' + nstr + ')' |
| 196 | nstr += '(' + self.visit(modifier.args) + ')' |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 197 | elif isinstance(modifier, c_ast.PtrDecl): |
| 198 | nstr = '*' + nstr |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 199 | s += ' ' + nstr |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 200 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 201 | elif typ in (c_ast.Typename, c_ast.Decl): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 202 | return self._generate_decl(n.type) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 203 | elif typ == c_ast.IdentifierType: |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 204 | return ' '.join(n.names) + ' ' |
| 205 | elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl): |
| 206 | return self._generate_type(n.type, modifiers + [n]) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 207 | |
| 208 | |
| 209 | def translate_to_c(filename): |
| 210 | ast = parse_file(filename, use_cpp=True) |
| 211 | generator = CGenerator() |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 212 | print(generator.visit(ast)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 213 | |
| 214 | |
| 215 | if __name__ == "__main__": |
| 216 | if len(sys.argv) > 1: |
| 217 | translate_to_c(sys.argv[1]) |
| 218 | else: |
| 219 | src = r''' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 220 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 221 | typedef enum tagReturnCode {SUCCESS=99, FAIL} ReturnCode; |
| 222 | |
| 223 | |
| 224 | typedef struct tagEntry |
| 225 | { |
| 226 | char* key; |
| 227 | char* value; |
| 228 | } Entry; |
| 229 | |
| 230 | |
| 231 | |
| 232 | typedef struct tagNode |
| 233 | { |
| 234 | Entry* entry; |
| 235 | |
| 236 | struct tagNode* next; |
| 237 | } Node; |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 238 | ''' |
| 239 | parser = c_parser.CParser() |
| 240 | ast = parser.parse(src) |
| 241 | ast.show() |
| 242 | generator = CGenerator() |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 243 | print(generator.visit(ast)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 244 | |
| 245 | print("Please provide a filename as argument") |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame^] | 246 | |
| 247 | |
| 248 | # ZZZ: operator precedence in expressions - especially problematic in |
| 249 | # assignments... - where to parenthesize? maybe just in BinaryOp? |
| 250 | # Other precedence-important operators (such as cast) need parens as well |
| 251 | |