Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 1 | #-----------------------------------------------------------------
|
| 2 | # pycparser: explore_ast.py
|
| 3 | #
|
| 4 | # This example demonstrates how to "explore" the AST created by
|
| 5 | # pycparser to understand its structure. The AST is a n-nary tree
|
| 6 | # of nodes, each node having several children, each with a name.
|
| 7 | # Just read the code, and let the comments guide you. The lines
|
| 8 | # beginning with #~ can be uncommented to print out useful
|
| 9 | # information from the AST.
|
eli.bendersky | 073f29e | 2011-08-31 22:52:05 +0300 | [diff] [blame] | 10 | # It helps to have the pycparser/_c_ast.cfg file in front of you.
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 11 | #
|
eli.bendersky | 84a6a63 | 2011-04-29 09:00:43 +0300 | [diff] [blame] | 12 | # Copyright (C) 2008-2011, Eli Bendersky
|
| 13 | # License: BSD
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 14 | #-----------------------------------------------------------------
|
eli.bendersky | 073f29e | 2011-08-31 22:52:05 +0300 | [diff] [blame] | 15 | from __future__ import print_function
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 16 | import sys
|
| 17 |
|
| 18 | # This is not required if you've installed pycparser into
|
| 19 | # your site-packages/ with setup.py
|
| 20 | #
|
Ben | f86dea1 | 2012-02-03 06:24:55 +0200 | [diff] [blame] | 21 | sys.path.extend(['.', '..'])
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 22 |
|
| 23 | from pycparser import c_parser, c_ast
|
| 24 |
|
| 25 | # This is some C source to parse. Note that pycparser must begin
|
| 26 | # at the top level of the C file, i.e. with either declarations
|
| 27 | # or function definitions (this is called "external declarations"
|
| 28 | # in C grammar lingo)
|
| 29 | #
|
| 30 | # Also, a C parser must have all the types declared in order to
|
| 31 | # build the correct AST. It doesn't matter what they're declared
|
| 32 | # to, so I've inserted the dummy typedef in the code to let the
|
| 33 | # parser know Hash and Node are types. You don't need to do it
|
| 34 | # when parsing real, correct C code.
|
| 35 | #
|
| 36 | text = r"""
|
| 37 | typedef int Node, Hash;
|
| 38 |
|
| 39 | void HashPrint(Hash* hash, void (*PrintFunc)(char*, char*))
|
| 40 | {
|
| 41 | unsigned int i;
|
| 42 |
|
| 43 | if (hash == NULL || hash->heads == NULL)
|
| 44 | return;
|
| 45 |
|
| 46 | for (i = 0; i < hash->table_size; ++i)
|
| 47 | {
|
| 48 | Node* temp = hash->heads[i];
|
| 49 |
|
| 50 | while (temp != NULL)
|
| 51 | {
|
| 52 | PrintFunc(temp->entry->key, temp->entry->value);
|
| 53 | temp = temp->next;
|
| 54 | }
|
| 55 | }
|
| 56 | }
|
| 57 | """
|
| 58 |
|
| 59 | # Create the parser and ask to parse the text. parse() will throw
|
| 60 | # a ParseError if there's an error in the code
|
| 61 | #
|
| 62 | parser = c_parser.CParser()
|
| 63 | ast = parser.parse(text, filename='<none>')
|
| 64 |
|
| 65 | # Uncomment the following line to see the AST in a nice, human
|
| 66 | # readable way. show() is the most useful tool in exploring ASTs
|
| 67 | # created by pycparser. See the c_ast.py file for the options you
|
| 68 | # can pass it.
|
| 69 | #
|
| 70 | #~ ast.show()
|
| 71 |
|
| 72 | # OK, we've seen that the top node is FileAST. This is always the
|
| 73 | # top node of the AST. Its children are "external declarations",
|
eli.bendersky | 073f29e | 2011-08-31 22:52:05 +0300 | [diff] [blame] | 74 | # and are stored in a list called ext[] (see _c_ast.cfg for the
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 75 | # names and types of Nodes and their children).
|
| 76 | # As you see from the printout, our AST has two Typedef children
|
| 77 | # and one FuncDef child.
|
| 78 | # Let's explore FuncDef more closely. As I've mentioned, the list
|
| 79 | # ext[] holds the children of FileAST. Since the function
|
| 80 | # definition is the third child, it's ext[2]. Uncomment the
|
| 81 | # following line to show it:
|
| 82 | #
|
| 83 | #~ ast.ext[2].show()
|
| 84 |
|
| 85 | # A FuncDef consists of a declaration, a list of parameter
|
| 86 | # declarations (for K&R style function definitions), and a body.
|
eli.bendersky | 073f29e | 2011-08-31 22:52:05 +0300 | [diff] [blame] | 87 | # First, let's examine the declaration.
|
| 88 | #
|
| 89 | function_decl = ast.ext[2].decl
|
| 90 |
|
| 91 | # function_decl, like any other declaration, is a Decl. Its type child
|
| 92 | # is a FuncDecl, which has a return type and arguments stored in a
|
| 93 | # ParamList node
|
| 94 | #~ function_decl.type.show()
|
| 95 | #~ function_decl.type.args.show()
|
| 96 |
|
| 97 | # The following displays the name and type of each argument:
|
| 98 | #
|
| 99 | #~ for param_decl in function_decl.type.args.params:
|
| 100 | #~ print('Arg name: %s' % param_decl.name)
|
| 101 | #~ print('Type:')
|
| 102 | #~ param_decl.type.show(offset=6)
|
| 103 |
|
| 104 | # The body is of FuncDef is a Compound, which is a placeholder for a block
|
| 105 | # surrounded by {} (You should be reading _c_ast.cfg parallel to this
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 106 | # explanation and seeing these things by your own eyes).
|
| 107 | #
|
| 108 | # Let's see the block's declarations:
|
| 109 | #
|
| 110 | function_body = ast.ext[2].body
|
| 111 |
|
eli.bendersky | ef29ff9 | 2010-10-29 16:25:43 +0200 | [diff] [blame] | 112 | # The following displays the declarations and statements in the function
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 113 | # body
|
| 114 | #
|
eli.bendersky | ef29ff9 | 2010-10-29 16:25:43 +0200 | [diff] [blame] | 115 | #~ for decl in function_body.block_items:
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 116 | #~ decl.show()
|
| 117 |
|
eli.bendersky | ef29ff9 | 2010-10-29 16:25:43 +0200 | [diff] [blame] | 118 | # We can see a single variable declaration, i, declared to be a simple type
|
| 119 | # declaration of type 'unsigned int', followed by statements.
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 120 | #
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 121 |
|
eli.bendersky | ef29ff9 | 2010-10-29 16:25:43 +0200 | [diff] [blame] | 122 | # block_items is a list, so the third element is the For statement:
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 123 | #
|
eli.bendersky | ef29ff9 | 2010-10-29 16:25:43 +0200 | [diff] [blame] | 124 | for_stmt = function_body.block_items[2]
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 125 | #~ for_stmt.show()
|
| 126 |
|
eli.bendersky | 073f29e | 2011-08-31 22:52:05 +0300 | [diff] [blame] | 127 | # As you can see in _c_ast.cfg, For's children are 'init, cond,
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 128 | # next' for the respective parts of the 'for' loop specifier,
|
| 129 | # and stmt, which is either a single stmt or a Compound if there's
|
| 130 | # a block.
|
| 131 | #
|
| 132 | # Let's dig deeper, to the while statement inside the for loop:
|
| 133 | #
|
eli.bendersky | ef29ff9 | 2010-10-29 16:25:43 +0200 | [diff] [blame] | 134 | while_stmt = for_stmt.stmt.block_items[1]
|
Eli Bendersky | 3921e8e | 2010-05-21 09:05:39 +0300 | [diff] [blame] | 135 | #~ while_stmt.show()
|
| 136 |
|
| 137 | # While is simpler, it only has a condition node and a stmt node.
|
| 138 | # The condition:
|
| 139 | #
|
| 140 | while_cond = while_stmt.cond
|
| 141 | #~ while_cond.show()
|
| 142 |
|
| 143 | # Note that it's a BinaryOp node - the basic constituent of
|
| 144 | # expressions in our AST. BinaryOp is the expression tree, with
|
| 145 | # left and right nodes as children. It also has the op attribute,
|
| 146 | # which is just the string representation of the operator.
|
| 147 | #
|
| 148 | #~ print while_cond.op
|
| 149 | #~ while_cond.left.show()
|
| 150 | #~ while_cond.right.show()
|
| 151 |
|
| 152 | #
|
| 153 | # That's if for the example. I hope you now see how easy it is to
|
| 154 | # explore the AST created by pycparser. Although on the surface it
|
| 155 | # is quite complex and has a lot of node types, this is the
|
| 156 | # inherent complexity of the C language every parser/compiler
|
| 157 | # designer has to cope with.
|
| 158 | # Using the tools provided by the c_ast package it's easy to
|
| 159 | # explore the structure of AST nodes and write code that processes
|
| 160 | # them.
|
| 161 | # Specifically, see the cdecl.py example for a non-trivial
|
| 162 | # demonstration of what you can do by recursively going through
|
| 163 | # the AST.
|
| 164 | #
|
| 165 |
|
| 166 |
|