blob: e0f6af09099a8753cf09ec4287ea0c23e9f9d322 [file] [log] [blame]
eli.benderskyd5ba3452011-02-18 21:32:47 +02001#-----------------------------------------------------------------
2# pycparser: c-to-c.py
3#
eli.benderskyafcfaac2011-02-25 16:46:01 +02004# Example of a C code generator from pycparser AST nodes, serving
5# as a simplistic translator from C to AST and back to C.
eli.benderskyb36f5bb2011-03-06 07:22:33 +02006# Note: at this stage, the example is "alpha release" and considered
7# experimental. Please file any bugs you find in the Issues page on pycparser's
8# website.
eli.benderskyd5ba3452011-02-18 21:32:47 +02009#
10# Copyright (C) 2008-2011, Eli Bendersky
eli.bendersky84a6a632011-04-29 09:00:43 +030011# License: BSD
eli.benderskyd5ba3452011-02-18 21:32:47 +020012#-----------------------------------------------------------------
13from __future__ import print_function
14import sys
15
16# This is not required if you've installed pycparser into
17# your site-packages/ with setup.py
18#
19sys.path.insert(0, '..')
20
21from pycparser import c_parser, c_ast, parse_file
22
23
eli.benderskyafcfaac2011-02-25 16:46:01 +020024class CGenerator(object):
25 """ Uses the same visitor pattern as c_ast.NodeVisitor, but modified to
26 return a value from each visit method, using string accumulation in
27 generic_visit.
28 """
eli.benderskyd5ba3452011-02-18 21:32:47 +020029 def __init__(self):
30 self.output = ''
31
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020032 # Statements start with indentation of self.indent_level spaces, using
33 # the _make_indent method
eli.benderskyafcfaac2011-02-25 16:46:01 +020034 #
35 self.indent_level = 0
36
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020037 def _make_indent(self):
38 return ' ' * self.indent_level
39
eli.benderskyafcfaac2011-02-25 16:46:01 +020040 def visit(self, node):
41 method = 'visit_' + node.__class__.__name__
42 return getattr(self, method, self.generic_visit)(node)
43
44 def generic_visit(self, node):
eli.benderskydcb859c2011-03-05 17:02:43 +020045 #~ print('generic:', type(node))
eli.benderskyafcfaac2011-02-25 16:46:01 +020046 if node is None:
47 return ''
48 else:
49 return ''.join(self.visit(c) for c in node.children())
eli.benderskyd5ba3452011-02-18 21:32:47 +020050
51 def visit_Constant(self, n):
eli.benderskyafcfaac2011-02-25 16:46:01 +020052 return n.value
53
eli.benderskyd5ba3452011-02-18 21:32:47 +020054 def visit_ID(self, n):
eli.benderskyafcfaac2011-02-25 16:46:01 +020055 return n.name
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020056
57 def visit_ArrayRef(self, n):
eli.benderskyb36f5bb2011-03-06 07:22:33 +020058 arrref = self._parenthesize_unless_simple(n.name)
59 return arrref + '[' + self.visit(n.subscript) + ']'
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020060
eli.bendersky724b1cc2011-03-05 10:45:08 +020061 def visit_StructRef(self, n):
eli.benderskyb36f5bb2011-03-06 07:22:33 +020062 sref = self._parenthesize_unless_simple(n.name)
63 return sref + n.type + self.visit(n.field)
eli.bendersky724b1cc2011-03-05 10:45:08 +020064
65 def visit_FuncCall(self, n):
eli.benderskyb36f5bb2011-03-06 07:22:33 +020066 fref = self._parenthesize_unless_simple(n.name)
67 return fref + '(' + self.visit(n.args) + ')'
eli.bendersky724b1cc2011-03-05 10:45:08 +020068
eli.benderskyae36e962011-02-27 08:25:05 +020069 def visit_UnaryOp(self, n):
eli.benderskyb36f5bb2011-03-06 07:22:33 +020070 operand = self._parenthesize_unless_simple(n.expr)
eli.benderskyae36e962011-02-27 08:25:05 +020071 if n.op == 'p++':
eli.benderskyb36f5bb2011-03-06 07:22:33 +020072 return '%s++' % operand
eli.benderskyd4a99752011-05-26 07:04:19 +030073 elif n.op == 'p--':
74 return '%s--' % operand
eli.bendersky8348a9d2011-05-26 07:01:43 +030075 elif n.op == 'sizeof':
76 # Always parenthesize the argument of sizeof since it can be
77 # a name.
78 return 'sizeof(%s)' % self.visit(n.expr)
eli.benderskyae36e962011-02-27 08:25:05 +020079 else:
eli.benderskyb36f5bb2011-03-06 07:22:33 +020080 return '%s%s' % (n.op, operand)
eli.benderskyae36e962011-02-27 08:25:05 +020081
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020082 def visit_BinaryOp(self, n):
eli.benderskyb36f5bb2011-03-06 07:22:33 +020083 lval_str = self._parenthesize_if(n.left,
84 lambda d: not self._is_simple_node(d))
85 rval_str = self._parenthesize_if(n.right,
86 lambda d: not self._is_simple_node(d))
eli.bendersky724b1cc2011-03-05 10:45:08 +020087 return '%s %s %s' % (lval_str, n.op, rval_str)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020088
89 def visit_Assignment(self, n):
eli.bendersky724b1cc2011-03-05 10:45:08 +020090 rval_str = self._parenthesize_if(
91 n.rvalue,
92 lambda n: isinstance(n, c_ast.Assignment))
93 return '%s %s %s' % (self.visit(n.lvalue), n.op, rval_str)
eli.benderskyd5ba3452011-02-18 21:32:47 +020094
95 def visit_IdentifierType(self, n):
eli.bendersky3ae9f7a2011-02-27 07:19:41 +020096 return ' '.join(n.names)
eli.benderskyd5ba3452011-02-18 21:32:47 +020097
eli.benderskyabb96e62011-03-05 16:30:05 +020098 def visit_Decl(self, n, no_type=False):
99 # no_type is used when a Decl is part of a DeclList, where the type is
100 # explicitly only for the first delaration in a list.
101 #
102 s = n.name if no_type else self._generate_decl(n)
eli.benderskyafcfaac2011-02-25 16:46:01 +0200103 if n.bitsize: s += ' : ' + self.visit(n.bitsize)
eli.benderskydcb859c2011-03-05 17:02:43 +0200104 if n.init:
105 if isinstance(n.init, c_ast.ExprList):
106 s += ' = {' + self.visit(n.init) + '}'
107 else:
108 s += ' = ' + self.visit(n.init)
eli.benderskyafcfaac2011-02-25 16:46:01 +0200109 return s
eli.benderskyd5ba3452011-02-18 21:32:47 +0200110
eli.benderskyabb96e62011-03-05 16:30:05 +0200111 def visit_DeclList(self, n):
112 s = self.visit(n.decls[0])
113 if len(n.decls) > 1:
114 s += ', ' + ', '.join(self.visit_Decl(decl, no_type=True)
115 for decl in n.decls[1:])
116 return s
117
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200118 def visit_Typedef(self, n):
119 s = ''
120 if n.storage: s += ' '.join(n.storage) + ' '
121 s += self._generate_type(n.type)
122 return s
123
124 def visit_Cast(self, n):
eli.bendersky6ed80a62011-05-09 18:22:33 +0300125 s = '(' + self._generate_type(n.to_type) + ')'
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200126 return s + ' ' + self.visit(n.expr)
127
eli.bendersky724b1cc2011-03-05 10:45:08 +0200128 def visit_ExprList(self, n):
eli.bendersky8e6c5862011-05-20 12:35:08 +0300129 visited_subexprs = []
130 for expr in n.exprs:
131 if isinstance(expr, c_ast.ExprList):
132 visited_subexprs.append('{' + self.visit(expr) + '}')
133 else:
134 visited_subexprs.append(self.visit(expr))
135 return ', '.join(visited_subexprs)
eli.bendersky724b1cc2011-03-05 10:45:08 +0200136
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200137 def visit_Enum(self, n):
138 s = 'enum'
139 if n.name: s += ' ' + n.name
140 if n.values:
141 s += ' {'
142 for i, enumerator in enumerate(n.values.enumerators):
143 s += enumerator.name
144 if enumerator.value:
145 s += ' = ' + self.visit(enumerator.value)
146 if i != len(n.values.enumerators) - 1:
147 s += ', '
148 s += '}'
149 return s
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200150
eli.benderskyafcfaac2011-02-25 16:46:01 +0200151 def visit_FuncDef(self, n):
152 decl = self.visit(n.decl)
153 self.indent_level = 0
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200154 # The body is a Compound node
eli.benderskyafcfaac2011-02-25 16:46:01 +0200155 body = self.visit(n.body)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200156 return decl + '\n' + body + '\n'
157
158 def visit_FileAST(self, n):
159 s = ''
160 for ext in n.ext:
161 if isinstance(ext, c_ast.FuncDef):
162 s += self.visit(ext)
163 else:
164 s += self.visit(ext) + ';\n'
165 return s
166
167 def visit_Compound(self, n):
168 s = self._make_indent() + '{\n'
169 self.indent_level += 2
eli.benderskycad1cfd2011-05-26 06:56:27 +0300170 if n.block_items:
171 s += ''.join(self._generate_stmt(stmt) for stmt in n.block_items)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200172 self.indent_level -= 2
173 s += self._make_indent() + '}\n'
174 return s
175
eli.bendersky91c0aa32011-10-16 05:50:43 +0200176 def visit_EmptyStatement(self, n):
177 return ';'
178
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200179 def visit_ParamList(self, n):
180 return ', '.join(self.visit(param) for param in n.params)
181
182 def visit_Return(self, n):
183 s = 'return'
184 if n.expr: s += ' ' + self.visit(n.expr)
185 return s + ';'
186
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200187 def visit_Break(self, n):
188 return 'break;'
189
190 def visit_Continue(self, n):
191 return 'continue;'
eli.bendersky724b1cc2011-03-05 10:45:08 +0200192
193 def visit_TernaryOp(self, n):
194 s = self.visit(n.cond) + ' ? '
195 s += self.visit(n.iftrue) + ' : '
196 s += self.visit(n.iffalse)
197 return s
198
eli.benderskye6ecb1a2011-03-05 10:58:46 +0200199 def visit_If(self, n):
200 s = 'if ('
201 if n.cond: s += self.visit(n.cond)
202 s += ')\n'
203 s += self._generate_stmt(n.iftrue, add_indent=True)
204 if n.iffalse:
205 s += self._make_indent() + 'else\n'
206 s += self._generate_stmt(n.iffalse, add_indent=True)
207 return s
208
eli.benderskyae36e962011-02-27 08:25:05 +0200209 def visit_For(self, n):
210 s = 'for ('
211 if n.init: s += self.visit(n.init)
212 s += ';'
213 if n.cond: s += ' ' + self.visit(n.cond)
214 s += ';'
215 if n.next: s += ' ' + self.visit(n.next)
216 s += ')\n'
217 s += self._generate_stmt(n.stmt, add_indent=True)
218 return s
219
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200220 def visit_While(self, n):
221 s = 'while ('
222 if n.cond: s += self.visit(n.cond)
223 s += ')\n'
224 s += self._generate_stmt(n.stmt, add_indent=True)
225 return s
226
eli.benderskye6ecb1a2011-03-05 10:58:46 +0200227 def visit_DoWhile(self, n):
228 s = 'do\n'
229 s += self._generate_stmt(n.stmt, add_indent=True)
230 s += self._make_indent() + 'while ('
231 if n.cond: s += self.visit(n.cond)
232 s += ');'
233 return s
234
eli.benderskyabb96e62011-03-05 16:30:05 +0200235 def visit_Switch(self, n):
236 s = 'switch (' + self.visit(n.cond) + ')\n'
237 s += self._generate_stmt(n.stmt, add_indent=True)
238 return s
239
240 def visit_Case(self, n):
241 s = 'case ' + self.visit(n.expr) + ':\n'
242 s += self._generate_stmt(n.stmt, add_indent=True)
243 return s
244
245 def visit_Default(self, n):
246 return 'default:\n' + self._generate_stmt(n.stmt, add_indent=True)
247
248 def visit_Label(self, n):
249 return n.name + ':\n' + self._generate_stmt(n.stmt)
250
251 def visit_Goto(self, n):
252 return 'goto ' + n.name + ';'
253
eli.benderskye6ecb1a2011-03-05 10:58:46 +0200254 def visit_EllipsisParam(self, n):
255 return '...'
256
eli.benderskyabb96e62011-03-05 16:30:05 +0200257 def visit_Struct(self, n):
258 return self._generate_struct_union(n, 'struct')
259
eli.bendersky8e6c5862011-05-20 12:35:08 +0300260 def visit_Typename(self, n):
261 return self._generate_type(n.type)
262
eli.benderskyabb96e62011-03-05 16:30:05 +0200263 def visit_Union(self, n):
264 return self._generate_struct_union(n, 'union')
265
eli.benderskydcb859c2011-03-05 17:02:43 +0200266 def visit_NamedInitializer(self, n):
267 s = ''
268 for name in n.name:
269 if isinstance(name, c_ast.ID):
270 s += '.' + name.name
271 elif isinstance(name, c_ast.Constant):
272 s += '[' + name.value + ']'
273 s += ' = ' + self.visit(n.expr)
274 return s
275
eli.benderskyabb96e62011-03-05 16:30:05 +0200276 def _generate_struct_union(self, n, name):
277 """ Generates code for structs and unions. name should be either
278 'struct' or union.
279 """
eli.benderskydcb859c2011-03-05 17:02:43 +0200280 s = name + ' ' + (n.name or '')
281 if n.decls:
282 s += '\n'
283 s += self._make_indent()
284 self.indent_level += 2
285 s += '{\n'
286 for decl in n.decls:
287 s += self._generate_stmt(decl)
288 self.indent_level -= 2
289 s += self._make_indent() + '}'
eli.benderskyabb96e62011-03-05 16:30:05 +0200290 return s
291
eli.benderskyae36e962011-02-27 08:25:05 +0200292 def _generate_stmt(self, n, add_indent=False):
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200293 """ Generation from a statement node. This method exists as a wrapper
294 for individual visit_* methods to handle different treatment of
295 some statements in this context.
296 """
297 typ = type(n)
eli.benderskyae36e962011-02-27 08:25:05 +0200298 if add_indent: self.indent_level += 2
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200299 indent = self._make_indent()
eli.benderskyae36e962011-02-27 08:25:05 +0200300 if add_indent: self.indent_level -= 2
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200301
eli.bendersky724b1cc2011-03-05 10:45:08 +0200302 if typ in (
303 c_ast.Decl, c_ast.Assignment, c_ast.Cast, c_ast.UnaryOp,
eli.benderskyb36f5bb2011-03-06 07:22:33 +0200304 c_ast.BinaryOp, c_ast.TernaryOp, c_ast.FuncCall, c_ast.ArrayRef,
305 c_ast.StructRef):
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200306 # These can also appear in an expression context so no semicolon
307 # is added to them automatically
308 #
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200309 return indent + self.visit(n) + ';\n'
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200310 elif typ in (c_ast.Compound,):
311 # No extra indentation required before the opening brace of a
312 # compound - because it consists of multiple lines it has to
313 # compute its own indentation.
314 #
eli.benderskyae36e962011-02-27 08:25:05 +0200315 return self.visit(n)
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200316 else:
eli.benderskyfc96e5e2011-03-04 09:51:23 +0200317 return indent + self.visit(n) + '\n'
eli.benderskyafcfaac2011-02-25 16:46:01 +0200318
eli.benderskyd5ba3452011-02-18 21:32:47 +0200319 def _generate_decl(self, n):
320 """ Generation from a Decl node.
321 """
eli.benderskyafcfaac2011-02-25 16:46:01 +0200322 s = ''
323 if n.funcspec: s = ' '.join(n.funcspec) + ' '
324 if n.storage: s += ' '.join(n.storage) + ' '
325 s += self._generate_type(n.type)
326 return s
eli.benderskyd5ba3452011-02-18 21:32:47 +0200327
328 def _generate_type(self, n, modifiers=[]):
329 """ Recursive generation from a type node. n is the type node.
eli.benderskyafcfaac2011-02-25 16:46:01 +0200330 modifiers collects the PtrDecl, ArrayDecl and FuncDecl modifiers
331 encountered on the way down to a TypeDecl, to allow proper
332 generation from it.
eli.benderskyd5ba3452011-02-18 21:32:47 +0200333 """
334 typ = type(n)
335 #~ print(n, modifiers)
336
337 if typ == c_ast.TypeDecl:
eli.benderskyafcfaac2011-02-25 16:46:01 +0200338 s = ''
339 if n.quals: s += ' '.join(n.quals) + ' '
340 s += self.visit(n.type)
eli.benderskyd5ba3452011-02-18 21:32:47 +0200341
342 nstr = n.declname if n.declname else ''
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200343 # Resolve modifiers.
eli.benderskyafcfaac2011-02-25 16:46:01 +0200344 # Wrap in parens to distinguish pointer to array and pointer to
345 # function syntax.
eli.benderskyd5ba3452011-02-18 21:32:47 +0200346 #
347 for i, modifier in enumerate(modifiers):
348 if isinstance(modifier, c_ast.ArrayDecl):
349 if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
350 nstr = '(' + nstr + ')'
eli.benderskyafcfaac2011-02-25 16:46:01 +0200351 nstr += '[' + self.visit(modifier.dim) + ']'
352 elif isinstance(modifier, c_ast.FuncDecl):
353 if (i != 0 and isinstance(modifiers[i - 1], c_ast.PtrDecl)):
354 nstr = '(' + nstr + ')'
355 nstr += '(' + self.visit(modifier.args) + ')'
eli.benderskyd5ba3452011-02-18 21:32:47 +0200356 elif isinstance(modifier, c_ast.PtrDecl):
357 nstr = '*' + nstr
eli.bendersky1bd6c172011-07-16 06:43:20 +0300358 if nstr: s += ' ' + nstr
eli.benderskyafcfaac2011-02-25 16:46:01 +0200359 return s
eli.bendersky6ed80a62011-05-09 18:22:33 +0300360 elif typ == c_ast.Decl:
eli.benderskyafcfaac2011-02-25 16:46:01 +0200361 return self._generate_decl(n.type)
eli.bendersky6ed80a62011-05-09 18:22:33 +0300362 elif typ == c_ast.Typename:
363 return self._generate_type(n.type)
eli.benderskyd5ba3452011-02-18 21:32:47 +0200364 elif typ == c_ast.IdentifierType:
eli.benderskyafcfaac2011-02-25 16:46:01 +0200365 return ' '.join(n.names) + ' '
366 elif typ in (c_ast.ArrayDecl, c_ast.PtrDecl, c_ast.FuncDecl):
367 return self._generate_type(n.type, modifiers + [n])
eli.benderskyabb96e62011-03-05 16:30:05 +0200368 else:
369 return self.visit(n)
eli.benderskyd5ba3452011-02-18 21:32:47 +0200370
eli.bendersky724b1cc2011-03-05 10:45:08 +0200371 def _parenthesize_if(self, n, condition):
372 """ Visits 'n' and returns its string representation, parenthesized
373 if the condition function applied to the node returns True.
374 """
375 s = self.visit(n)
376 if condition(n):
377 return '(' + s + ')'
378 else:
379 return s
380
eli.benderskyb36f5bb2011-03-06 07:22:33 +0200381 def _parenthesize_unless_simple(self, n):
382 """ Common use case for _parenthesize_if
383 """
384 return self._parenthesize_if(n, lambda d: not self._is_simple_node(d))
385
386 def _is_simple_node(self, n):
387 """ Returns True for nodes that are "simple" - i.e. nodes that always
388 have higher precedence than operators.
389 """
390 return isinstance(n,( c_ast.Constant, c_ast.ID, c_ast.ArrayRef,
391 c_ast.StructRef, c_ast.FuncCall))
392
eli.benderskyd5ba3452011-02-18 21:32:47 +0200393
394def translate_to_c(filename):
395 ast = parse_file(filename, use_cpp=True)
396 generator = CGenerator()
eli.benderskyafcfaac2011-02-25 16:46:01 +0200397 print(generator.visit(ast))
eli.benderskyd5ba3452011-02-18 21:32:47 +0200398
399
eli.benderskyb36f5bb2011-03-06 07:22:33 +0200400def zz_test_translate():
401 # internal use
eli.bendersky8e6c5862011-05-20 12:35:08 +0300402 src = r'''
eli.bendersky1bd6c172011-07-16 06:43:20 +0300403
404 void f(void){}
405
eli.bendersky8348a9d2011-05-26 07:01:43 +0300406int main(void)
407{
eli.bendersky91c0aa32011-10-16 05:50:43 +0200408 ;
409 return 0;
eli.bendersky8348a9d2011-05-26 07:01:43 +0300410}
411'''
eli.bendersky8e6c5862011-05-20 12:35:08 +0300412 parser = c_parser.CParser()
413 ast = parser.parse(src)
414 ast.show()
415 generator = CGenerator()
416
417 print(generator.visit(ast))
418
419 # tracing the generator for debugging
420 #~ import trace
421 #~ tr = trace.Trace(countcallers=1)
422 #~ tr.runfunc(generator.visit, ast)
423 #~ tr.results().write_results()
eli.benderskyb36f5bb2011-03-06 07:22:33 +0200424
425
426#------------------------------------------------------------------------------
427if __name__ == "__main__":
eli.bendersky6ed80a62011-05-09 18:22:33 +0300428 zz_test_translate()
eli.benderskyb36f5bb2011-03-06 07:22:33 +0200429 if len(sys.argv) > 1:
430 translate_to_c(sys.argv[1])
431 else:
eli.benderskyd5ba3452011-02-18 21:32:47 +0200432 print("Please provide a filename as argument")
eli.bendersky3ae9f7a2011-02-27 07:19:41 +0200433