blob: 24df79f26cba10cda1b55d93adf586183659ab28 [file] [log] [blame]
Eli Bendersky3921e8e2010-05-21 09:05:39 +03001#-----------------------------------------------------------------
2# pycparser: explore_ast.py
3#
4# This example demonstrates how to "explore" the AST created by
5# pycparser to understand its structure. The AST is a n-nary tree
6# of nodes, each node having several children, each with a name.
7# Just read the code, and let the comments guide you. The lines
8# beginning with #~ can be uncommented to print out useful
9# information from the AST.
eli.bendersky073f29e2011-08-31 22:52:05 +030010# It helps to have the pycparser/_c_ast.cfg file in front of you.
Eli Bendersky3921e8e2010-05-21 09:05:39 +030011#
eli.bendersky84a6a632011-04-29 09:00:43 +030012# Copyright (C) 2008-2011, Eli Bendersky
13# License: BSD
Eli Bendersky3921e8e2010-05-21 09:05:39 +030014#-----------------------------------------------------------------
eli.bendersky073f29e2011-08-31 22:52:05 +030015from __future__ import print_function
Eli Bendersky3921e8e2010-05-21 09:05:39 +030016import sys
17
18# This is not required if you've installed pycparser into
19# your site-packages/ with setup.py
20#
Benf86dea12012-02-03 06:24:55 +020021sys.path.extend(['.', '..'])
Eli Bendersky3921e8e2010-05-21 09:05:39 +030022
23from pycparser import c_parser, c_ast
24
25# This is some C source to parse. Note that pycparser must begin
26# at the top level of the C file, i.e. with either declarations
27# or function definitions (this is called "external declarations"
28# in C grammar lingo)
29#
30# Also, a C parser must have all the types declared in order to
31# build the correct AST. It doesn't matter what they're declared
32# to, so I've inserted the dummy typedef in the code to let the
33# parser know Hash and Node are types. You don't need to do it
34# when parsing real, correct C code.
35#
36text = r"""
37 typedef int Node, Hash;
38
39 void HashPrint(Hash* hash, void (*PrintFunc)(char*, char*))
40 {
41 unsigned int i;
42
43 if (hash == NULL || hash->heads == NULL)
44 return;
45
46 for (i = 0; i < hash->table_size; ++i)
47 {
48 Node* temp = hash->heads[i];
49
50 while (temp != NULL)
51 {
52 PrintFunc(temp->entry->key, temp->entry->value);
53 temp = temp->next;
54 }
55 }
56 }
57"""
58
59# Create the parser and ask to parse the text. parse() will throw
60# a ParseError if there's an error in the code
61#
62parser = c_parser.CParser()
63ast = parser.parse(text, filename='<none>')
64
65# Uncomment the following line to see the AST in a nice, human
66# readable way. show() is the most useful tool in exploring ASTs
67# created by pycparser. See the c_ast.py file for the options you
68# can pass it.
69#
70#~ ast.show()
71
72# OK, we've seen that the top node is FileAST. This is always the
73# top node of the AST. Its children are "external declarations",
eli.bendersky073f29e2011-08-31 22:52:05 +030074# and are stored in a list called ext[] (see _c_ast.cfg for the
Eli Bendersky3921e8e2010-05-21 09:05:39 +030075# names and types of Nodes and their children).
76# As you see from the printout, our AST has two Typedef children
77# and one FuncDef child.
78# Let's explore FuncDef more closely. As I've mentioned, the list
79# ext[] holds the children of FileAST. Since the function
80# definition is the third child, it's ext[2]. Uncomment the
81# following line to show it:
82#
83#~ ast.ext[2].show()
84
85# A FuncDef consists of a declaration, a list of parameter
86# declarations (for K&R style function definitions), and a body.
eli.bendersky073f29e2011-08-31 22:52:05 +030087# First, let's examine the declaration.
88#
89function_decl = ast.ext[2].decl
90
91# function_decl, like any other declaration, is a Decl. Its type child
92# is a FuncDecl, which has a return type and arguments stored in a
93# ParamList node
94#~ function_decl.type.show()
95#~ function_decl.type.args.show()
96
97# The following displays the name and type of each argument:
98#
99#~ for param_decl in function_decl.type.args.params:
100 #~ print('Arg name: %s' % param_decl.name)
101 #~ print('Type:')
102 #~ param_decl.type.show(offset=6)
103
104# The body is of FuncDef is a Compound, which is a placeholder for a block
105# surrounded by {} (You should be reading _c_ast.cfg parallel to this
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300106# explanation and seeing these things by your own eyes).
107#
108# Let's see the block's declarations:
109#
110function_body = ast.ext[2].body
111
eli.benderskyef29ff92010-10-29 16:25:43 +0200112# The following displays the declarations and statements in the function
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300113# body
114#
eli.benderskyef29ff92010-10-29 16:25:43 +0200115#~ for decl in function_body.block_items:
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300116 #~ decl.show()
117
eli.benderskyef29ff92010-10-29 16:25:43 +0200118# We can see a single variable declaration, i, declared to be a simple type
119# declaration of type 'unsigned int', followed by statements.
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300120#
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300121
eli.benderskyef29ff92010-10-29 16:25:43 +0200122# block_items is a list, so the third element is the For statement:
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300123#
eli.benderskyef29ff92010-10-29 16:25:43 +0200124for_stmt = function_body.block_items[2]
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300125#~ for_stmt.show()
126
eli.bendersky073f29e2011-08-31 22:52:05 +0300127# As you can see in _c_ast.cfg, For's children are 'init, cond,
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300128# next' for the respective parts of the 'for' loop specifier,
129# and stmt, which is either a single stmt or a Compound if there's
130# a block.
131#
132# Let's dig deeper, to the while statement inside the for loop:
133#
eli.benderskyef29ff92010-10-29 16:25:43 +0200134while_stmt = for_stmt.stmt.block_items[1]
Eli Bendersky3921e8e2010-05-21 09:05:39 +0300135#~ while_stmt.show()
136
137# While is simpler, it only has a condition node and a stmt node.
138# The condition:
139#
140while_cond = while_stmt.cond
141#~ while_cond.show()
142
143# Note that it's a BinaryOp node - the basic constituent of
144# expressions in our AST. BinaryOp is the expression tree, with
145# left and right nodes as children. It also has the op attribute,
146# which is just the string representation of the operator.
147#
148#~ print while_cond.op
149#~ while_cond.left.show()
150#~ while_cond.right.show()
151
152#
153# That's if for the example. I hope you now see how easy it is to
154# explore the AST created by pycparser. Although on the surface it
155# is quite complex and has a lot of node types, this is the
156# inherent complexity of the C language every parser/compiler
157# designer has to cope with.
158# Using the tools provided by the c_ast package it's easy to
159# explore the structure of AST nodes and write code that processes
160# them.
161# Specifically, see the cdecl.py example for a non-trivial
162# demonstration of what you can do by recursively going through
163# the AST.
164#
165
166