blob: 2b33360c78a5ce5345dff907eda3add61c788af3 [file] [log] [blame]
Michael White18c28442017-02-02 20:02:20 -08001#------------------------------------------------------------------------------
2# pycparser: c_json.py
3#
4# by Michael White (@mypalmike)
5#
6# This example includes functions to serialize and deserialize an ast
7# to and from json format. Serializing involves walking the ast and converting
8# each node from a python Node object into a python dict. Deserializing
9# involves the opposite conversion, walking the tree formed by the
10# dict and converting each dict into the specific Node object it represents.
11# The dict itself is serialized and deserialized using the python json module.
12#
13# The dict representation is a fairly direct transformation of the object
14# attributes. Each node in the dict gets one metadata field referring to the
15# specific node class name, _nodetype. Each local attribute (i.e. not linking
16# to child nodes) has a string value or array of string values. Each child
17# attribute is either another dict or an array of dicts, exactly as in the
18# Node object representation. The "coord" attribute, representing the
19# node's location within the source code, is serialized/deserialized from
20# a Coord object into a string of the format "filename:line[:column]".
21#
22# Example TypeDecl node, with IdentifierType child node, represented as a dict:
23# "type": {
24# "_nodetype": "TypeDecl",
25# "coord": "c_files/funky.c:8",
26# "declname": "o",
27# "quals": [],
28# "type": {
29# "_nodetype": "IdentifierType",
30# "coord": "c_files/funky.c:8",
31# "names": [
32# "char"
33# ]
34# }
35# }
36#------------------------------------------------------------------------------
37from __future__ import print_function
38
39import json
40import sys
41import re
42
43# This is not required if you've installed pycparser into
44# your site-packages/ with setup.py
45#
46sys.path.extend(['.', '..'])
47
48from pycparser import parse_file, c_ast
49from pycparser.plyparser import Coord
50
51
Ville Skyttä2129f5f2017-03-05 04:52:22 +020052RE_CHILD_ARRAY = re.compile(r'(.*)\[(.*)\]')
Michael White18c28442017-02-02 20:02:20 -080053RE_INTERNAL_ATTR = re.compile('__.*__')
54
55
56class CJsonError(Exception):
57 pass
58
59
60def memodict(fn):
61 """ Fast memoization decorator for a function taking a single argument """
62 class memodict(dict):
63 def __missing__(self, key):
64 ret = self[key] = fn(key)
65 return ret
66 return memodict().__getitem__
67
68
69@memodict
70def child_attrs_of(klass):
71 """
72 Given a Node class, get a set of child attrs.
73 Memoized to avoid highly repetitive string manipulation
74
75 """
76 non_child_attrs = set(klass.attr_names)
77 all_attrs = set([i for i in klass.__slots__ if not RE_INTERNAL_ATTR.match(i)])
78 return all_attrs - non_child_attrs
79
80
81def to_dict(node):
82 """ Recursively convert an ast into dict representation. """
83 klass = node.__class__
84
85 result = {}
86
87 # Metadata
88 result['_nodetype'] = klass.__name__
89
90 # Local node attributes
91 for attr in klass.attr_names:
92 result[attr] = getattr(node, attr)
93
94 # Coord object
95 if node.coord:
96 result['coord'] = str(node.coord)
97 else:
98 result['coord'] = None
99
100 # Child attributes
101 for child_name, child in node.children():
102 # Child strings are either simple (e.g. 'value') or arrays (e.g. 'block_items[1]')
103 match = RE_CHILD_ARRAY.match(child_name)
104 if match:
105 array_name, array_index = match.groups()
106 array_index = int(array_index)
107 # arrays come in order, so we verify and append.
108 result[array_name] = result.get(array_name, [])
109 if array_index != len(result[array_name]):
110 raise CJsonError('Internal ast error. Array {} out of order. '
111 'Expected index {}, got {}'.format(
112 array_name, len(result[array_name]), array_index))
113 result[array_name].append(to_dict(child))
114 else:
115 result[child_name] = to_dict(child)
116
117 # Any child attributes that were missing need "None" values in the json.
118 for child_attr in child_attrs_of(klass):
119 if child_attr not in result:
120 result[child_attr] = None
121
122 return result
123
124
125def to_json(node, **kwargs):
126 """ Convert ast node to json string """
127 return json.dumps(to_dict(node), **kwargs)
128
129
130def file_to_dict(filename):
131 """ Load C file into dict representation of ast """
132 ast = parse_file(filename, use_cpp=True)
133 return to_dict(ast)
134
135
136def file_to_json(filename, **kwargs):
137 """ Load C file into json string representation of ast """
138 ast = parse_file(filename, use_cpp=True)
139 return to_json(ast, **kwargs)
140
141
142def _parse_coord(coord_str):
143 """ Parse coord string (file:line[:column]) into Coord object. """
144 if coord_str is None:
145 return None
146
147 vals = coord_str.split(':')
148 vals.extend([None] * 3)
149 filename, line, column = vals[:3]
150 return Coord(filename, line, column)
151
152
153def _convert_to_obj(value):
154 """
155 Convert an object in the dict representation into an object.
156 Note: Mutually recursive with from_dict.
157
158 """
159 value_type = type(value)
160 if value_type == dict:
161 return from_dict(value)
162 elif value_type == list:
163 return [_convert_to_obj(item) for item in value]
164 else:
165 # String
166 return value
167
168
169def from_dict(node_dict):
170 """ Recursively build an ast from dict representation """
171 class_name = node_dict.pop('_nodetype')
172
173 klass = getattr(c_ast, class_name)
174
175 # Create a new dict containing the key-value pairs which we can pass
176 # to node constructors.
177 objs = {}
178 for key, value in node_dict.items():
179 if key == 'coord':
180 objs[key] = _parse_coord(value)
181 else:
182 objs[key] = _convert_to_obj(value)
183
184 # Use keyword parameters, which works thanks to beautifully consistent
185 # ast Node initializers.
186 return klass(**objs)
187
188
189def from_json(ast_json):
190 """ Build an ast from json string representation """
191 return from_dict(json.loads(ast_json))
192
193
194#------------------------------------------------------------------------------
195if __name__ == "__main__":
196 if len(sys.argv) > 1:
197 # Some test code...
198 # Do trip from C -> ast -> dict -> ast -> json, then print.
199 ast_dict = file_to_dict(sys.argv[1])
200 ast = from_dict(ast_dict)
201 print(to_json(ast, sort_keys=True, indent=4))
202 else:
203 print("Please provide a filename as argument")