eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 1 | #----------------------------------------------------------------- |
| 2 | # pycparser: c-to-c.py |
| 3 | # |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 4 | # Example of a C code generator from pycparser AST nodes, serving |
| 5 | # as a simplistic translator from C to AST and back to C. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 6 | # |
| 7 | # Copyright (C) 2008-2011, Eli Bendersky |
| 8 | # License: LGPL |
| 9 | #----------------------------------------------------------------- |
| 10 | from __future__ import print_function |
| 11 | import sys |
| 12 | |
| 13 | # This is not required if you've installed pycparser into |
| 14 | # your site-packages/ with setup.py |
| 15 | # |
| 16 | sys.path.insert(0, '..') |
| 17 | |
| 18 | from pycparser import c_parser, c_ast, parse_file |
| 19 | |
| 20 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 21 | class CGenerator(object): |
| 22 | """ Uses the same visitor pattern as c_ast.NodeVisitor, but modified to |
| 23 | return a value from each visit method, using string accumulation in |
| 24 | generic_visit. |
| 25 | """ |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 26 | def __init__(self): |
| 27 | self.output = '' |
| 28 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 29 | # Statements start with indentation of self.indent_level spaces, using |
| 30 | # the _make_indent method |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 31 | # |
| 32 | self.indent_level = 0 |
| 33 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 34 | def _make_indent(self): |
| 35 | return ' ' * self.indent_level |
| 36 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 37 | def visit(self, node): |
| 38 | method = 'visit_' + node.__class__.__name__ |
| 39 | return getattr(self, method, self.generic_visit)(node) |
| 40 | |
| 41 | def generic_visit(self, node): |
| 42 | #~ print('generic:', type(node)) |
| 43 | if node is None: |
| 44 | return '' |
| 45 | else: |
| 46 | return ''.join(self.visit(c) for c in node.children()) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 47 | |
| 48 | def visit_Constant(self, n): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 49 | return n.value |
| 50 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 51 | def visit_ID(self, n): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 52 | return n.name |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 53 | |
| 54 | def visit_ArrayRef(self, n): |
| 55 | return self.visit(n.name) + '[' + self.visit(n.subscript) + ']' |
| 56 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 57 | def visit_UnaryOp(self, n): |
| 58 | if n.op == 'p++': |
| 59 | return '%s++' % self.visit(n.expr) |
| 60 | else: |
| 61 | return '%s%s' % (n.op, self.visit(n.expr)) |
| 62 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 63 | def visit_BinaryOp(self, n): |
| 64 | return '%s %s %s' % (self.visit(n.left), n.op, self.visit(n.right)) |
| 65 | |
| 66 | def visit_Assignment(self, n): |
| 67 | return '%s %s %s' % (self.visit(n.lvalue), n.op, self.visit(n.rvalue)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 68 | |
| 69 | def visit_IdentifierType(self, n): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 70 | return ' '.join(n.names) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 71 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 72 | def visit_Decl(self, n): |
| 73 | s = self._generate_decl(n) |
| 74 | if n.bitsize: s += ' : ' + self.visit(n.bitsize) |
| 75 | if n.init: s += ' = ' + self.visit(n.init) |
| 76 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 77 | |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 78 | def visit_Typedef(self, n): |
| 79 | s = '' |
| 80 | if n.storage: s += ' '.join(n.storage) + ' ' |
| 81 | s += self._generate_type(n.type) |
| 82 | return s |
| 83 | |
| 84 | def visit_Cast(self, n): |
| 85 | s = '(' + self.visit(n.to_type) + ')' |
| 86 | return s + ' ' + self.visit(n.expr) |
| 87 | |
| 88 | def visit_Enum(self, n): |
| 89 | s = 'enum' |
| 90 | if n.name: s += ' ' + n.name |
| 91 | if n.values: |
| 92 | s += ' {' |
| 93 | for i, enumerator in enumerate(n.values.enumerators): |
| 94 | s += enumerator.name |
| 95 | if enumerator.value: |
| 96 | s += ' = ' + self.visit(enumerator.value) |
| 97 | if i != len(n.values.enumerators) - 1: |
| 98 | s += ', ' |
| 99 | s += '}' |
| 100 | return s |
| 101 | |
| 102 | def visit_Struct(self, n): |
| 103 | s = 'struct' |
| 104 | if n.name: s += ' ' + n.name |
| 105 | if n.decls: |
| 106 | s += ' { \n' |
| 107 | for decl in n.decls: |
| 108 | s += ' ' + self.visit(decl) + ';\n' |
| 109 | s += '}' |
| 110 | return s |
| 111 | |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 112 | def visit_FuncDef(self, n): |
| 113 | decl = self.visit(n.decl) |
| 114 | self.indent_level = 0 |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 115 | # The body is a Compound node |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 116 | body = self.visit(n.body) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 117 | return decl + '\n' + body + '\n' |
| 118 | |
| 119 | def visit_FileAST(self, n): |
| 120 | s = '' |
| 121 | for ext in n.ext: |
| 122 | if isinstance(ext, c_ast.FuncDef): |
| 123 | s += self.visit(ext) |
| 124 | else: |
| 125 | s += self.visit(ext) + ';\n' |
| 126 | return s |
| 127 | |
| 128 | def visit_Compound(self, n): |
| 129 | s = self._make_indent() + '{\n' |
| 130 | self.indent_level += 2 |
| 131 | s += ''.join(self._generate_stmt(stmt) for stmt in n.block_items) |
| 132 | self.indent_level -= 2 |
| 133 | s += self._make_indent() + '}\n' |
| 134 | return s |
| 135 | |
| 136 | def visit_ParamList(self, n): |
| 137 | return ', '.join(self.visit(param) for param in n.params) |
| 138 | |
| 139 | def visit_Return(self, n): |
| 140 | s = 'return' |
| 141 | if n.expr: s += ' ' + self.visit(n.expr) |
| 142 | return s + ';' |
| 143 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 144 | def visit_For(self, n): |
| 145 | s = 'for (' |
| 146 | if n.init: s += self.visit(n.init) |
| 147 | s += ';' |
| 148 | if n.cond: s += ' ' + self.visit(n.cond) |
| 149 | s += ';' |
| 150 | if n.next: s += ' ' + self.visit(n.next) |
| 151 | s += ')\n' |
| 152 | s += self._generate_stmt(n.stmt, add_indent=True) |
| 153 | return s |
| 154 | |
| 155 | def _generate_stmt(self, n, add_indent=False): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 156 | """ Generation from a statement node. This method exists as a wrapper |
| 157 | for individual visit_* methods to handle different treatment of |
| 158 | some statements in this context. |
| 159 | """ |
| 160 | typ = type(n) |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 161 | if add_indent: self.indent_level += 2 |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 162 | s = self._make_indent() |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 163 | if add_indent: self.indent_level -= 2 |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 164 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 165 | if typ in ( c_ast.Decl, c_ast.Assignment, c_ast.Cast, c_ast.UnaryOp, |
| 166 | c_ast.BinaryOp): |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 167 | # These can also appear in an expression context so no semicolon |
| 168 | # is added to them automatically |
| 169 | # |
| 170 | return s + self.visit(n) + ';\n' |
| 171 | elif typ in (c_ast.Compound,): |
| 172 | # No extra indentation required before the opening brace of a |
| 173 | # compound - because it consists of multiple lines it has to |
| 174 | # compute its own indentation. |
| 175 | # |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 176 | return self.visit(n) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 177 | else: |
| 178 | return s + self.visit(n) + '\n' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 179 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 180 | def _generate_decl(self, n): |
| 181 | """ Generation from a Decl node. |
| 182 | """ |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 183 | s = '' |
| 184 | if n.funcspec: s = ' '.join(n.funcspec) + ' ' |
| 185 | if n.storage: s += ' '.join(n.storage) + ' ' |
| 186 | s += self._generate_type(n.type) |
| 187 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 188 | |
| 189 | def _generate_type(self, n, modifiers=[]): |
| 190 | """ Recursive generation from a type node. n is the type node. |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 191 | modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers |
| 192 | encountered on the way down to a TypeDecl, to allow proper |
| 193 | generation from it. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 194 | """ |
| 195 | typ = type(n) |
| 196 | #~ print(n, modifiers) |
| 197 | |
| 198 | if typ == c_ast.TypeDecl: |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 199 | s = '' |
| 200 | if n.quals: s += ' '.join(n.quals) + ' ' |
| 201 | s += self.visit(n.type) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 202 | |
| 203 | nstr = n.declname if n.declname else '' |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 204 | # Resolve modifiers. |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 205 | # Wrap in parens to distinguish pointer to array and pointer to |
| 206 | # function syntax. |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 207 | # |
| 208 | for i, modifier in enumerate(modifiers): |
| 209 | if isinstance(modifier, c_ast.ArrayDecl): |
| 210 | if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): |
| 211 | nstr = '(' + nstr + ')' |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 212 | nstr += '[' + self.visit(modifier.dim) + ']' |
| 213 | elif isinstance(modifier, c_ast.FuncDecl): |
| 214 | if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)): |
| 215 | nstr = '(' + nstr + ')' |
| 216 | nstr += '(' + self.visit(modifier.args) + ')' |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 217 | elif isinstance(modifier, c_ast.PtrDecl): |
| 218 | nstr = '*' + nstr |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 219 | s += ' ' + nstr |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 220 | return s |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 221 | elif typ in (c_ast.Typename, c_ast.Decl): |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 222 | return self._generate_decl(n.type) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 223 | elif typ == c_ast.IdentifierType: |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 224 | return ' '.join(n.names) + ' ' |
| 225 | elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl): |
| 226 | return self._generate_type(n.type, modifiers + [n]) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 227 | |
| 228 | |
| 229 | def translate_to_c(filename): |
| 230 | ast = parse_file(filename, use_cpp=True) |
| 231 | generator = CGenerator() |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 232 | print(generator.visit(ast)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 233 | |
| 234 | |
| 235 | if __name__ == "__main__": |
| 236 | if len(sys.argv) > 1: |
| 237 | translate_to_c(sys.argv[1]) |
| 238 | else: |
| 239 | src = r''' |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 240 | static unsigned int hash_func(const char* str, unsigned int table_size) |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 241 | { |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 242 | unsigned int hash_value; |
| 243 | unsigned int a = 127; |
| 244 | a++; |
| 245 | ++a; |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 246 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 247 | for (hash_value = 0; *str != 0; ++str) |
| 248 | {hash_value = (a*hash_value + *str) % table_size;} |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 249 | |
eli.bendersky | ae36e96 | 2011-02-27 08:25:05 +0200 | [diff] [blame^] | 250 | return hash_value; |
| 251 | } |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 252 | |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 253 | ''' |
| 254 | parser = c_parser.CParser() |
| 255 | ast = parser.parse(src) |
| 256 | ast.show() |
| 257 | generator = CGenerator() |
eli.bendersky | afcfaac | 2011-02-25 16:46:01 +0200 | [diff] [blame] | 258 | print(generator.visit(ast)) |
eli.bendersky | d5ba345 | 2011-02-18 21:32:47 +0200 | [diff] [blame] | 259 | |
| 260 | print("Please provide a filename as argument") |
eli.bendersky | 3ae9f7a | 2011-02-27 07:19:41 +0200 | [diff] [blame] | 261 | |
| 262 | |
| 263 | # ZZZ: operator precedence in expressions - especially problematic in |
| 264 | # assignments... - where to parenthesize? maybe just in BinaryOp? |
| 265 | # Other precedence-important operators (such as cast) need parens as well |
| 266 | |