blob: f79d204f25f0d4d499005afb8a2972907adf0a5a [file] [log] [blame]
eli.benderskyd5ba3452011-02-18 21:32:47 +02001#-----------------------------------------------------------------
2# pycparser: c-to-c.py
3#
eli.benderskyafcfaac2011-02-25 16:46:01 +02004# Example of a C code generator from pycparser AST nodes, serving
5# as a simplistic translator from C to AST and back to C.
eli.benderskyd5ba3452011-02-18 21:32:47 +02006#
7# Copyright (C) 2008-2011, Eli Bendersky
8# License: LGPL
9#-----------------------------------------------------------------
10from __future__ import print_function
11import sys
12
13# This is not required if you've installed pycparser into
14# your site-packages/ with setup.py
15#
16sys.path.insert(0, '..')
17
18from pycparser import c_parser, c_ast, parse_file
19
20
eli.benderskyafcfaac2011-02-25 16:46:01 +020021class CGenerator(object):
22 """ Uses the same visitor pattern as c_ast.NodeVisitor, but modified to
23 return a value from each visit method, using string accumulation in
24 generic_visit.
25 """
eli.benderskyd5ba3452011-02-18 21:32:47 +020026 def __init__(self):
27 self.output = ''
28
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020029 # Statements start with indentation of self.indent_level spaces, using
30 # the _make_indent method
eli.benderskyafcfaac2011-02-25 16:46:01 +020031 #
32 self.indent_level = 0
33
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020034 def _make_indent(self):
35 return ' ' * self.indent_level
36
eli.benderskyafcfaac2011-02-25 16:46:01 +020037 def visit(self, node):
38 method = 'visit_' + node.__class__.__name__
39 return getattr(self, method, self.generic_visit)(node)
40
41 def generic_visit(self, node):
42 #~ print('generic:', type(node))
43 if node is None:
44 return ''
45 else:
46 return ''.join(self.visit(c) for c in node.children())
eli.benderskyd5ba3452011-02-18 21:32:47 +020047
48 def visit_Constant(self, n):
eli.benderskyafcfaac2011-02-25 16:46:01 +020049 return n.value
50
eli.benderskyd5ba3452011-02-18 21:32:47 +020051 def visit_ID(self, n):
eli.benderskyafcfaac2011-02-25 16:46:01 +020052 return n.name
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020053
54 def visit_ArrayRef(self, n):
55 return self.visit(n.name) + '[' + self.visit(n.subscript) + ']'
56
eli.benderskyae36e962011-02-27 08:25:05 +020057 def visit_UnaryOp(self, n):
58 if n.op == 'p++':
59 return '%s++' % self.visit(n.expr)
60 else:
61 return '%s%s' % (n.op, self.visit(n.expr))
62
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020063 def visit_BinaryOp(self, n):
64 return '%s %s %s' % (self.visit(n.left), n.op, self.visit(n.right))
65
66 def visit_Assignment(self, n):
67 return '%s %s %s' % (self.visit(n.lvalue), n.op, self.visit(n.rvalue))
eli.benderskyd5ba3452011-02-18 21:32:47 +020068
69 def visit_IdentifierType(self, n):
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020070 return ' '.join(n.names)
eli.benderskyd5ba3452011-02-18 21:32:47 +020071
eli.benderskyafcfaac2011-02-25 16:46:01 +020072 def visit_Decl(self, n):
73 s = self._generate_decl(n)
74 if n.bitsize: s += ' : ' + self.visit(n.bitsize)
75 if n.init: s += ' = ' + self.visit(n.init)
76 return s
eli.benderskyd5ba3452011-02-18 21:32:47 +020077
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020078 def visit_Typedef(self, n):
79 s = ''
80 if n.storage: s += ' '.join(n.storage) + ' '
81 s += self._generate_type(n.type)
82 return s
83
84 def visit_Cast(self, n):
85 s = '(' + self.visit(n.to_type) + ')'
86 return s + ' ' + self.visit(n.expr)
87
88 def visit_Enum(self, n):
89 s = 'enum'
90 if n.name: s += ' ' + n.name
91 if n.values:
92 s += ' {'
93 for i, enumerator in enumerate(n.values.enumerators):
94 s += enumerator.name
95 if enumerator.value:
96 s += ' = ' + self.visit(enumerator.value)
97 if i != len(n.values.enumerators) - 1:
98 s += ', '
99 s += '}'
100 return s
101
102 def visit_Struct(self, n):
103 s = 'struct'
104 if n.name: s += ' ' + n.name
105 if n.decls:
106 s += ' { \n'
107 for decl in n.decls:
108 s += ' ' + self.visit(decl) + ';\n'
109 s += '}'
110 return s
111
eli.benderskyafcfaac2011-02-25 16:46:01 +0200112 def visit_FuncDef(self, n):
113 decl = self.visit(n.decl)
114 self.indent_level = 0
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200115 # The body is a Compound node
eli.benderskyafcfaac2011-02-25 16:46:01 +0200116 body = self.visit(n.body)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200117 return decl + '\n' + body + '\n'
118
119 def visit_FileAST(self, n):
120 s = ''
121 for ext in n.ext:
122 if isinstance(ext, c_ast.FuncDef):
123 s += self.visit(ext)
124 else:
125 s += self.visit(ext) + ';\n'
126 return s
127
128 def visit_Compound(self, n):
129 s = self._make_indent() + '{\n'
130 self.indent_level += 2
131 s += ''.join(self._generate_stmt(stmt) for stmt in n.block_items)
132 self.indent_level -= 2
133 s += self._make_indent() + '}\n'
134 return s
135
136 def visit_ParamList(self, n):
137 return ', '.join(self.visit(param) for param in n.params)
138
139 def visit_Return(self, n):
140 s = 'return'
141 if n.expr: s += ' ' + self.visit(n.expr)
142 return s + ';'
143
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200144 def visit_Break(self, n):
145 return 'break;'
146
147 def visit_Continue(self, n):
148 return 'continue;'
149
eli.benderskyae36e962011-02-27 08:25:05 +0200150 def visit_For(self, n):
151 s = 'for ('
152 if n.init: s += self.visit(n.init)
153 s += ';'
154 if n.cond: s += ' ' + self.visit(n.cond)
155 s += ';'
156 if n.next: s += ' ' + self.visit(n.next)
157 s += ')\n'
158 s += self._generate_stmt(n.stmt, add_indent=True)
159 return s
160
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200161 def visit_While(self, n):
162 s = 'while ('
163 if n.cond: s += self.visit(n.cond)
164 s += ')\n'
165 s += self._generate_stmt(n.stmt, add_indent=True)
166 return s
167
eli.benderskyae36e962011-02-27 08:25:05 +0200168 def _generate_stmt(self, n, add_indent=False):
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200169 """ Generation from a statement node. This method exists as a wrapper
170 for individual visit_* methods to handle different treatment of
171 some statements in this context.
172 """
173 typ = type(n)
eli.benderskyae36e962011-02-27 08:25:05 +0200174 if add_indent: self.indent_level += 2
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200175 indent = self._make_indent()
eli.benderskyae36e962011-02-27 08:25:05 +0200176 if add_indent: self.indent_level -= 2
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200177
eli.benderskyae36e962011-02-27 08:25:05 +0200178 if typ in ( c_ast.Decl, c_ast.Assignment, c_ast.Cast, c_ast.UnaryOp,
179 c_ast.BinaryOp):
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200180 # These can also appear in an expression context so no semicolon
181 # is added to them automatically
182 #
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200183 return indent + self.visit(n) + ';\n'
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200184 elif typ in (c_ast.Compound,):
185 # No extra indentation required before the opening brace of a
186 # compound - because it consists of multiple lines it has to
187 # compute its own indentation.
188 #
eli.benderskyae36e962011-02-27 08:25:05 +0200189 return self.visit(n)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200190 else:
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200191 return indent + self.visit(n) + '\n'
eli.benderskyafcfaac2011-02-25 16:46:01 +0200192
eli.benderskyd5ba3452011-02-18 21:32:47 +0200193 def _generate_decl(self, n):
194 """ Generation from a Decl node.
195 """
eli.benderskyafcfaac2011-02-25 16:46:01 +0200196 s = ''
197 if n.funcspec: s = ' '.join(n.funcspec) + ' '
198 if n.storage: s += ' '.join(n.storage) + ' '
199 s += self._generate_type(n.type)
200 return s
eli.benderskyd5ba3452011-02-18 21:32:47 +0200201
202 def _generate_type(self, n, modifiers=[]):
203 """ Recursive generation from a type node. n is the type node.
eli.benderskyafcfaac2011-02-25 16:46:01 +0200204 modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
205 encountered on the way down to a TypeDecl, to allow proper
206 generation from it.
eli.benderskyd5ba3452011-02-18 21:32:47 +0200207 """
208 typ = type(n)
209 #~ print(n, modifiers)
210
211 if typ == c_ast.TypeDecl:
eli.benderskyafcfaac2011-02-25 16:46:01 +0200212 s = ''
213 if n.quals: s += ' '.join(n.quals) + ' '
214 s += self.visit(n.type)
eli.benderskyd5ba3452011-02-18 21:32:47 +0200215
216 nstr = n.declname if n.declname else ''
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200217 # Resolve modifiers.
eli.benderskyafcfaac2011-02-25 16:46:01 +0200218 # Wrap in parens to distinguish pointer to array and pointer to
219 # function syntax.
eli.benderskyd5ba3452011-02-18 21:32:47 +0200220 #
221 for i, modifier in enumerate(modifiers):
222 if isinstance(modifier, c_ast.ArrayDecl):
223 if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
224 nstr = '(' + nstr + ')'
eli.benderskyafcfaac2011-02-25 16:46:01 +0200225 nstr += '[' + self.visit(modifier.dim) + ']'
226 elif isinstance(modifier, c_ast.FuncDecl):
227 if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
228 nstr = '(' + nstr + ')'
229 nstr += '(' + self.visit(modifier.args) + ')'
eli.benderskyd5ba3452011-02-18 21:32:47 +0200230 elif isinstance(modifier, c_ast.PtrDecl):
231 nstr = '*' + nstr
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200232 s += ' ' + nstr
eli.benderskyafcfaac2011-02-25 16:46:01 +0200233 return s
eli.benderskyd5ba3452011-02-18 21:32:47 +0200234 elif typ in (c_ast.Typename, c_ast.Decl):
eli.benderskyafcfaac2011-02-25 16:46:01 +0200235 return self._generate_decl(n.type)
eli.benderskyd5ba3452011-02-18 21:32:47 +0200236 elif typ == c_ast.IdentifierType:
eli.benderskyafcfaac2011-02-25 16:46:01 +0200237 return ' '.join(n.names) + ' '
238 elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl):
239 return self._generate_type(n.type, modifiers + [n])
eli.benderskyd5ba3452011-02-18 21:32:47 +0200240
241
242def translate_to_c(filename):
243 ast = parse_file(filename, use_cpp=True)
244 generator = CGenerator()
eli.benderskyafcfaac2011-02-25 16:46:01 +0200245 print(generator.visit(ast))
eli.benderskyd5ba3452011-02-18 21:32:47 +0200246
247
248if __name__ == "__main__":
249 if len(sys.argv) > 1:
250 translate_to_c(sys.argv[1])
251 else:
252 src = r'''
eli.benderskyae36e962011-02-27 08:25:05 +0200253static unsigned int hash_func(const char* str, unsigned int table_size)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200254{
eli.benderskyae36e962011-02-27 08:25:05 +0200255 unsigned int hash_value;
256 unsigned int a = 127;
257 a++;
258 ++a;
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200259
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200260 while (hash_value == 0) {
261 hash_value = (a*hash_value + *str) % table_size;
262 break;
263 }
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200264
eli.benderskyae36e962011-02-27 08:25:05 +0200265 return hash_value;
266}
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200267
eli.benderskyd5ba3452011-02-18 21:32:47 +0200268 '''
269 parser = c_parser.CParser()
270 ast = parser.parse(src)
271 ast.show()
272 generator = CGenerator()
eli.benderskyafcfaac2011-02-25 16:46:01 +0200273 print(generator.visit(ast))
eli.benderskyd5ba3452011-02-18 21:32:47 +0200274
275 print("Please provide a filename as argument")
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200276
277
278# ZZZ: operator precedence in expressions - especially problematic in
279# assignments... - where to parenthesize? maybe just in BinaryOp?
280# Other precedence-important operators (such as cast) need parens as well
281
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200282# ZZZ: turn self.indent_level += 2 ... -= 2 into a context manager!
283