Eli Bendersky | a291586 | 2012-06-23 06:25:53 +0300 | [diff] [blame] | 1 | #----------------------------------------------------------------- |
| 2 | # pycparser: explore_ast.py |
| 3 | # |
| 4 | # This example demonstrates how to "explore" the AST created by |
| 5 | # pycparser to understand its structure. The AST is a n-nary tree |
| 6 | # of nodes, each node having several children, each with a name. |
| 7 | # Just read the code, and let the comments guide you. The lines |
| 8 | # beginning with #~ can be uncommented to print out useful |
| 9 | # information from the AST. |
| 10 | # It helps to have the pycparser/_c_ast.cfg file in front of you. |
| 11 | # |
Eli Bendersky | d69771e | 2015-05-10 08:19:38 -0700 | [diff] [blame] | 12 | # Copyright (C) 2008-2015, Eli Bendersky |
Eli Bendersky | a291586 | 2012-06-23 06:25:53 +0300 | [diff] [blame] | 13 | # License: BSD |
| 14 | #----------------------------------------------------------------- |
| 15 | from __future__ import print_function |
| 16 | import sys |
| 17 | |
| 18 | # This is not required if you've installed pycparser into |
| 19 | # your site-packages/ with setup.py |
| 20 | # |
| 21 | sys.path.extend(['.', '..']) |
| 22 | |
| 23 | from pycparser import c_parser, c_ast |
| 24 | |
| 25 | # This is some C source to parse. Note that pycparser must begin |
| 26 | # at the top level of the C file, i.e. with either declarations |
| 27 | # or function definitions (this is called "external declarations" |
| 28 | # in C grammar lingo) |
| 29 | # |
| 30 | # Also, a C parser must have all the types declared in order to |
| 31 | # build the correct AST. It doesn't matter what they're declared |
| 32 | # to, so I've inserted the dummy typedef in the code to let the |
| 33 | # parser know Hash and Node are types. You don't need to do it |
| 34 | # when parsing real, correct C code. |
| 35 | # |
| 36 | text = r""" |
| 37 | typedef int Node, Hash; |
| 38 | |
| 39 | void HashPrint(Hash* hash, void (*PrintFunc)(char*, char*)) |
| 40 | { |
| 41 | unsigned int i; |
| 42 | |
| 43 | if (hash == NULL || hash->heads == NULL) |
| 44 | return; |
| 45 | |
| 46 | for (i = 0; i < hash->table_size; ++i) |
| 47 | { |
| 48 | Node* temp = hash->heads[i]; |
| 49 | |
| 50 | while (temp != NULL) |
| 51 | { |
| 52 | PrintFunc(temp->entry->key, temp->entry->value); |
| 53 | temp = temp->next; |
| 54 | } |
| 55 | } |
| 56 | } |
| 57 | """ |
| 58 | |
| 59 | # Create the parser and ask to parse the text. parse() will throw |
| 60 | # a ParseError if there's an error in the code |
| 61 | # |
| 62 | parser = c_parser.CParser() |
| 63 | ast = parser.parse(text, filename='<none>') |
| 64 | |
| 65 | # Uncomment the following line to see the AST in a nice, human |
| 66 | # readable way. show() is the most useful tool in exploring ASTs |
| 67 | # created by pycparser. See the c_ast.py file for the options you |
| 68 | # can pass it. |
| 69 | # |
| 70 | #~ ast.show() |
| 71 | |
| 72 | # OK, we've seen that the top node is FileAST. This is always the |
| 73 | # top node of the AST. Its children are "external declarations", |
| 74 | # and are stored in a list called ext[] (see _c_ast.cfg for the |
| 75 | # names and types of Nodes and their children). |
| 76 | # As you see from the printout, our AST has two Typedef children |
| 77 | # and one FuncDef child. |
| 78 | # Let's explore FuncDef more closely. As I've mentioned, the list |
| 79 | # ext[] holds the children of FileAST. Since the function |
| 80 | # definition is the third child, it's ext[2]. Uncomment the |
| 81 | # following line to show it: |
| 82 | # |
| 83 | #~ ast.ext[2].show() |
| 84 | |
| 85 | # A FuncDef consists of a declaration, a list of parameter |
| 86 | # declarations (for K&R style function definitions), and a body. |
| 87 | # First, let's examine the declaration. |
| 88 | # |
| 89 | function_decl = ast.ext[2].decl |
| 90 | |
| 91 | # function_decl, like any other declaration, is a Decl. Its type child |
| 92 | # is a FuncDecl, which has a return type and arguments stored in a |
| 93 | # ParamList node |
| 94 | #~ function_decl.type.show() |
| 95 | #~ function_decl.type.args.show() |
| 96 | |
| 97 | # The following displays the name and type of each argument: |
| 98 | # |
| 99 | #~ for param_decl in function_decl.type.args.params: |
| 100 | #~ print('Arg name: %s' % param_decl.name) |
| 101 | #~ print('Type:') |
| 102 | #~ param_decl.type.show(offset=6) |
| 103 | |
| 104 | # The body is of FuncDef is a Compound, which is a placeholder for a block |
| 105 | # surrounded by {} (You should be reading _c_ast.cfg parallel to this |
Eli Bendersky | 3ea82c2 | 2013-09-25 06:24:48 -0700 | [diff] [blame] | 106 | # explanation and seeing these things with your own eyes). |
Eli Bendersky | a291586 | 2012-06-23 06:25:53 +0300 | [diff] [blame] | 107 | # Let's see the block's declarations: |
| 108 | # |
| 109 | function_body = ast.ext[2].body |
| 110 | |
| 111 | # The following displays the declarations and statements in the function |
| 112 | # body |
| 113 | # |
| 114 | #~ for decl in function_body.block_items: |
| 115 | #~ decl.show() |
| 116 | |
| 117 | # We can see a single variable declaration, i, declared to be a simple type |
| 118 | # declaration of type 'unsigned int', followed by statements. |
Eli Bendersky | a291586 | 2012-06-23 06:25:53 +0300 | [diff] [blame] | 119 | |
| 120 | # block_items is a list, so the third element is the For statement: |
| 121 | # |
| 122 | for_stmt = function_body.block_items[2] |
| 123 | #~ for_stmt.show() |
| 124 | |
| 125 | # As you can see in _c_ast.cfg, For's children are 'init, cond, |
| 126 | # next' for the respective parts of the 'for' loop specifier, |
| 127 | # and stmt, which is either a single stmt or a Compound if there's |
| 128 | # a block. |
| 129 | # |
| 130 | # Let's dig deeper, to the while statement inside the for loop: |
| 131 | # |
| 132 | while_stmt = for_stmt.stmt.block_items[1] |
| 133 | #~ while_stmt.show() |
| 134 | |
| 135 | # While is simpler, it only has a condition node and a stmt node. |
| 136 | # The condition: |
| 137 | # |
| 138 | while_cond = while_stmt.cond |
| 139 | #~ while_cond.show() |
| 140 | |
| 141 | # Note that it's a BinaryOp node - the basic constituent of |
| 142 | # expressions in our AST. BinaryOp is the expression tree, with |
| 143 | # left and right nodes as children. It also has the op attribute, |
| 144 | # which is just the string representation of the operator. |
| 145 | # |
Chris Morrison | cbe6e3b | 2014-11-05 20:50:00 +1100 | [diff] [blame] | 146 | #~ print(while_cond.op) |
Eli Bendersky | a291586 | 2012-06-23 06:25:53 +0300 | [diff] [blame] | 147 | #~ while_cond.left.show() |
| 148 | #~ while_cond.right.show() |
| 149 | |
| 150 | # |
Eli Bendersky | 3ea82c2 | 2013-09-25 06:24:48 -0700 | [diff] [blame] | 151 | # That's it for the example. I hope you now see how easy it is to explore the |
| 152 | # AST created by pycparser. Although on the surface it is quite complex and has |
| 153 | # a lot of node types, this is the inherent complexity of the C language every |
| 154 | # parser/compiler designer has to cope with. |
| 155 | # Using the tools provided by the c_ast package it's easy to explore the |
| 156 | # structure of AST nodes and write code that processes them. |
| 157 | # Specifically, see the cdecl.py example for a non-trivial demonstration of what |
| 158 | # you can do by recursively going through the AST. |
Eli Bendersky | a291586 | 2012-06-23 06:25:53 +0300 | [diff] [blame] | 159 | # |
| 160 | |
| 161 | |