blob: 70fbbdd2ffbd8a230b07b3534940cd77d58e0ff2 [file] [log] [blame]
Georg Brandl0c77a822008-06-10 16:37:50 +00001"""
2 ast
3 ~~~
4
5 The `ast` module helps Python applications to process trees of the Python
6 abstract syntax grammar. The abstract syntax itself might change with
7 each Python release; this module helps to find out programmatically what
8 the current grammar looks like and allows modifications of it.
9
10 An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
11 a flag to the `compile()` builtin function or by using the `parse()`
12 function from this module. The result will be a tree of objects whose
13 classes all inherit from `ast.AST`.
14
15 A modified abstract syntax tree can be compiled into a Python code object
16 using the built-in `compile()` function.
17
18 Additionally various helper functions are provided that make working with
19 the trees simpler. The main intention of the helper functions and this
20 module in general is to provide an easy to use interface for libraries
21 that work tightly with the python syntax (template engines for example).
22
23
24 :copyright: Copyright 2008 by Armin Ronacher.
25 :license: Python License.
26"""
27from _ast import *
28
29
Guido van Rossum495da292019-03-07 12:38:08 -080030def parse(source, filename='<unknown>', mode='exec', *,
Miss Islington (bot)3ba21072019-06-11 17:55:28 -070031 type_comments=False, feature_version=None):
Georg Brandl0c77a822008-06-10 16:37:50 +000032 """
Terry Reedyfeac6242011-01-24 21:36:03 +000033 Parse the source into an AST node.
34 Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
Guido van Rossumdcfcd142019-01-31 03:40:27 -080035 Pass type_comments=True to get back type comments where the syntax allows.
Georg Brandl0c77a822008-06-10 16:37:50 +000036 """
Guido van Rossumdcfcd142019-01-31 03:40:27 -080037 flags = PyCF_ONLY_AST
38 if type_comments:
39 flags |= PyCF_TYPE_COMMENTS
Miss Islington (bot)3ba21072019-06-11 17:55:28 -070040 if isinstance(feature_version, tuple):
41 major, minor = feature_version # Should be a 2-tuple.
42 assert major == 3
43 feature_version = minor
44 elif feature_version is None:
45 feature_version = -1
46 # Else it should be an int giving the minor version for 3.x.
Guido van Rossum495da292019-03-07 12:38:08 -080047 return compile(source, filename, mode, flags,
48 feature_version=feature_version)
Georg Brandl0c77a822008-06-10 16:37:50 +000049
50
51def literal_eval(node_or_string):
52 """
53 Safely evaluate an expression node or a string containing a Python
54 expression. The string or node provided may only consist of the following
Éric Araujo2a83cc62011-04-17 19:10:27 +020055 Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
56 sets, booleans, and None.
Georg Brandl0c77a822008-06-10 16:37:50 +000057 """
Georg Brandl0c77a822008-06-10 16:37:50 +000058 if isinstance(node_or_string, str):
59 node_or_string = parse(node_or_string, mode='eval')
60 if isinstance(node_or_string, Expression):
61 node_or_string = node_or_string.body
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020062 def _convert_num(node):
63 if isinstance(node, Constant):
Serhiy Storchaka3f228112018-09-27 17:42:37 +030064 if type(node.value) in (int, float, complex):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020065 return node.value
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020066 raise ValueError('malformed node or string: ' + repr(node))
67 def _convert_signed_num(node):
68 if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
69 operand = _convert_num(node.operand)
70 if isinstance(node.op, UAdd):
71 return + operand
72 else:
73 return - operand
74 return _convert_num(node)
Georg Brandl0c77a822008-06-10 16:37:50 +000075 def _convert(node):
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010076 if isinstance(node, Constant):
77 return node.value
Georg Brandl0c77a822008-06-10 16:37:50 +000078 elif isinstance(node, Tuple):
79 return tuple(map(_convert, node.elts))
80 elif isinstance(node, List):
81 return list(map(_convert, node.elts))
Georg Brandl492f3fc2010-07-11 09:41:21 +000082 elif isinstance(node, Set):
83 return set(map(_convert, node.elts))
Georg Brandl0c77a822008-06-10 16:37:50 +000084 elif isinstance(node, Dict):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020085 return dict(zip(map(_convert, node.keys),
86 map(_convert, node.values)))
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010087 elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020088 left = _convert_signed_num(node.left)
89 right = _convert_num(node.right)
90 if isinstance(left, (int, float)) and isinstance(right, complex):
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010091 if isinstance(node.op, Add):
92 return left + right
93 else:
94 return left - right
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020095 return _convert_signed_num(node)
Georg Brandl0c77a822008-06-10 16:37:50 +000096 return _convert(node_or_string)
97
98
99def dump(node, annotate_fields=True, include_attributes=False):
100 """
101 Return a formatted dump of the tree in *node*. This is mainly useful for
102 debugging purposes. The returned string will show the names and the values
103 for fields. This makes the code impossible to evaluate, so if evaluation is
104 wanted *annotate_fields* must be set to False. Attributes such as line
Benjamin Petersondcf97b92008-07-02 17:30:14 +0000105 numbers and column offsets are not dumped by default. If this is wanted,
Georg Brandl0c77a822008-06-10 16:37:50 +0000106 *include_attributes* can be set to True.
107 """
108 def _format(node):
109 if isinstance(node, AST):
110 fields = [(a, _format(b)) for a, b in iter_fields(node)]
111 rv = '%s(%s' % (node.__class__.__name__, ', '.join(
112 ('%s=%s' % field for field in fields)
113 if annotate_fields else
114 (b for a, b in fields)
115 ))
116 if include_attributes and node._attributes:
117 rv += fields and ', ' or ' '
118 rv += ', '.join('%s=%s' % (a, _format(getattr(node, a)))
119 for a in node._attributes)
120 return rv + ')'
121 elif isinstance(node, list):
122 return '[%s]' % ', '.join(_format(x) for x in node)
123 return repr(node)
124 if not isinstance(node, AST):
125 raise TypeError('expected AST, got %r' % node.__class__.__name__)
126 return _format(node)
127
128
129def copy_location(new_node, old_node):
130 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000131 Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset`
132 attributes) from *old_node* to *new_node* if possible, and return *new_node*.
Georg Brandl0c77a822008-06-10 16:37:50 +0000133 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000134 for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset':
Georg Brandl0c77a822008-06-10 16:37:50 +0000135 if attr in old_node._attributes and attr in new_node._attributes \
136 and hasattr(old_node, attr):
137 setattr(new_node, attr, getattr(old_node, attr))
138 return new_node
139
140
141def fix_missing_locations(node):
142 """
143 When you compile a node tree with compile(), the compiler expects lineno and
144 col_offset attributes for every node that supports them. This is rather
145 tedious to fill in for generated nodes, so this helper adds these attributes
146 recursively where not already set, by setting them to the values of the
147 parent node. It works recursively starting at *node*.
148 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000149 def _fix(node, lineno, col_offset, end_lineno, end_col_offset):
Georg Brandl0c77a822008-06-10 16:37:50 +0000150 if 'lineno' in node._attributes:
151 if not hasattr(node, 'lineno'):
152 node.lineno = lineno
153 else:
154 lineno = node.lineno
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000155 if 'end_lineno' in node._attributes:
156 if not hasattr(node, 'end_lineno'):
157 node.end_lineno = end_lineno
158 else:
159 end_lineno = node.end_lineno
Georg Brandl0c77a822008-06-10 16:37:50 +0000160 if 'col_offset' in node._attributes:
161 if not hasattr(node, 'col_offset'):
162 node.col_offset = col_offset
163 else:
164 col_offset = node.col_offset
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000165 if 'end_col_offset' in node._attributes:
166 if not hasattr(node, 'end_col_offset'):
167 node.end_col_offset = end_col_offset
168 else:
169 end_col_offset = node.end_col_offset
Georg Brandl0c77a822008-06-10 16:37:50 +0000170 for child in iter_child_nodes(node):
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000171 _fix(child, lineno, col_offset, end_lineno, end_col_offset)
172 _fix(node, 1, 0, 1, 0)
Georg Brandl0c77a822008-06-10 16:37:50 +0000173 return node
174
175
176def increment_lineno(node, n=1):
177 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000178 Increment the line number and end line number of each node in the tree
179 starting at *node* by *n*. This is useful to "move code" to a different
180 location in a file.
Georg Brandl0c77a822008-06-10 16:37:50 +0000181 """
Georg Brandl0c77a822008-06-10 16:37:50 +0000182 for child in walk(node):
183 if 'lineno' in child._attributes:
184 child.lineno = getattr(child, 'lineno', 0) + n
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000185 if 'end_lineno' in child._attributes:
186 child.end_lineno = getattr(child, 'end_lineno', 0) + n
Georg Brandl0c77a822008-06-10 16:37:50 +0000187 return node
188
189
190def iter_fields(node):
191 """
192 Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
193 that is present on *node*.
194 """
195 for field in node._fields:
196 try:
197 yield field, getattr(node, field)
198 except AttributeError:
199 pass
200
201
202def iter_child_nodes(node):
203 """
204 Yield all direct child nodes of *node*, that is, all fields that are nodes
205 and all items of fields that are lists of nodes.
206 """
207 for name, field in iter_fields(node):
208 if isinstance(field, AST):
209 yield field
210 elif isinstance(field, list):
211 for item in field:
212 if isinstance(item, AST):
213 yield item
214
215
216def get_docstring(node, clean=True):
217 """
218 Return the docstring for the given node or None if no docstring can
219 be found. If the node provided does not have docstrings a TypeError
220 will be raised.
Matthias Bussonnier41cea702017-02-23 22:44:19 -0800221
222 If *clean* is `True`, all tabs are expanded to spaces and any whitespace
223 that can be uniformly removed from the second line onwards is removed.
Georg Brandl0c77a822008-06-10 16:37:50 +0000224 """
Yury Selivanov2f07a662015-07-23 08:54:35 +0300225 if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
Georg Brandl0c77a822008-06-10 16:37:50 +0000226 raise TypeError("%r can't have docstrings" % node.__class__.__name__)
Serhiy Storchaka08f127a2018-06-15 11:05:15 +0300227 if not(node.body and isinstance(node.body[0], Expr)):
Serhiy Storchaka73cbe7a2018-05-29 12:04:55 +0300228 return None
229 node = node.body[0].value
230 if isinstance(node, Str):
231 text = node.s
232 elif isinstance(node, Constant) and isinstance(node.value, str):
233 text = node.value
234 else:
235 return None
Serhiy Storchaka08f127a2018-06-15 11:05:15 +0300236 if clean:
Victor Stinnerf2c1aa12016-01-26 00:40:57 +0100237 import inspect
238 text = inspect.cleandoc(text)
239 return text
Georg Brandl0c77a822008-06-10 16:37:50 +0000240
241
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000242def _splitlines_no_ff(source):
243 """Split a string into lines ignoring form feed and other chars.
244
245 This mimics how the Python parser splits source code.
246 """
247 idx = 0
248 lines = []
249 next_line = ''
250 while idx < len(source):
251 c = source[idx]
252 next_line += c
253 idx += 1
254 # Keep \r\n together
255 if c == '\r' and idx < len(source) and source[idx] == '\n':
256 next_line += '\n'
257 idx += 1
258 if c in '\r\n':
259 lines.append(next_line)
260 next_line = ''
261
262 if next_line:
263 lines.append(next_line)
264 return lines
265
266
267def _pad_whitespace(source):
268 """Replace all chars except '\f\t' in a line with spaces."""
269 result = ''
270 for c in source:
271 if c in '\f\t':
272 result += c
273 else:
274 result += ' '
275 return result
276
277
278def get_source_segment(source, node, *, padded=False):
279 """Get source code segment of the *source* that generated *node*.
280
281 If some location information (`lineno`, `end_lineno`, `col_offset`,
282 or `end_col_offset`) is missing, return None.
283
284 If *padded* is `True`, the first line of a multi-line statement will
285 be padded with spaces to match its original position.
286 """
287 try:
288 lineno = node.lineno - 1
289 end_lineno = node.end_lineno - 1
290 col_offset = node.col_offset
291 end_col_offset = node.end_col_offset
292 except AttributeError:
293 return None
294
295 lines = _splitlines_no_ff(source)
296 if end_lineno == lineno:
297 return lines[lineno].encode()[col_offset:end_col_offset].decode()
298
299 if padded:
300 padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode())
301 else:
302 padding = ''
303
304 first = padding + lines[lineno].encode()[col_offset:].decode()
305 last = lines[end_lineno].encode()[:end_col_offset].decode()
306 lines = lines[lineno+1:end_lineno]
307
308 lines.insert(0, first)
309 lines.append(last)
310 return ''.join(lines)
311
312
Georg Brandl0c77a822008-06-10 16:37:50 +0000313def walk(node):
314 """
Georg Brandl619e7ba2011-01-09 07:38:51 +0000315 Recursively yield all descendant nodes in the tree starting at *node*
316 (including *node* itself), in no specified order. This is useful if you
317 only want to modify nodes in place and don't care about the context.
Georg Brandl0c77a822008-06-10 16:37:50 +0000318 """
319 from collections import deque
320 todo = deque([node])
321 while todo:
322 node = todo.popleft()
323 todo.extend(iter_child_nodes(node))
324 yield node
325
326
327class NodeVisitor(object):
328 """
329 A node visitor base class that walks the abstract syntax tree and calls a
330 visitor function for every node found. This function may return a value
331 which is forwarded by the `visit` method.
332
333 This class is meant to be subclassed, with the subclass adding visitor
334 methods.
335
336 Per default the visitor functions for the nodes are ``'visit_'`` +
337 class name of the node. So a `TryFinally` node visit function would
338 be `visit_TryFinally`. This behavior can be changed by overriding
339 the `visit` method. If no visitor function exists for a node
340 (return value `None`) the `generic_visit` visitor is used instead.
341
342 Don't use the `NodeVisitor` if you want to apply changes to nodes during
343 traversing. For this a special visitor exists (`NodeTransformer`) that
344 allows modifications.
345 """
346
347 def visit(self, node):
348 """Visit a node."""
349 method = 'visit_' + node.__class__.__name__
350 visitor = getattr(self, method, self.generic_visit)
351 return visitor(node)
352
353 def generic_visit(self, node):
354 """Called if no explicit visitor function exists for a node."""
355 for field, value in iter_fields(node):
356 if isinstance(value, list):
357 for item in value:
358 if isinstance(item, AST):
359 self.visit(item)
360 elif isinstance(value, AST):
361 self.visit(value)
362
363
364class NodeTransformer(NodeVisitor):
365 """
366 A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
367 allows modification of nodes.
368
369 The `NodeTransformer` will walk the AST and use the return value of the
370 visitor methods to replace or remove the old node. If the return value of
371 the visitor method is ``None``, the node will be removed from its location,
372 otherwise it is replaced with the return value. The return value may be the
373 original node in which case no replacement takes place.
374
375 Here is an example transformer that rewrites all occurrences of name lookups
376 (``foo``) to ``data['foo']``::
377
378 class RewriteName(NodeTransformer):
379
380 def visit_Name(self, node):
381 return copy_location(Subscript(
382 value=Name(id='data', ctx=Load()),
383 slice=Index(value=Str(s=node.id)),
384 ctx=node.ctx
385 ), node)
386
387 Keep in mind that if the node you're operating on has child nodes you must
388 either transform the child nodes yourself or call the :meth:`generic_visit`
389 method for the node first.
390
391 For nodes that were part of a collection of statements (that applies to all
392 statement nodes), the visitor may also return a list of nodes rather than
393 just a single node.
394
395 Usually you use the transformer like this::
396
397 node = YourTransformer().visit(node)
398 """
399
400 def generic_visit(self, node):
401 for field, old_value in iter_fields(node):
Georg Brandl0c77a822008-06-10 16:37:50 +0000402 if isinstance(old_value, list):
403 new_values = []
404 for value in old_value:
405 if isinstance(value, AST):
406 value = self.visit(value)
407 if value is None:
408 continue
409 elif not isinstance(value, AST):
410 new_values.extend(value)
411 continue
412 new_values.append(value)
413 old_value[:] = new_values
414 elif isinstance(old_value, AST):
415 new_node = self.visit(old_value)
416 if new_node is None:
417 delattr(node, field)
418 else:
419 setattr(node, field, new_node)
420 return node
Serhiy Storchaka3f228112018-09-27 17:42:37 +0300421
422
423# The following code is for backward compatibility.
424# It will be removed in future.
425
426def _getter(self):
427 return self.value
428
429def _setter(self, value):
430 self.value = value
431
432Constant.n = property(_getter, _setter)
433Constant.s = property(_getter, _setter)
434
435class _ABC(type):
436
437 def __instancecheck__(cls, inst):
438 if not isinstance(inst, Constant):
439 return False
440 if cls in _const_types:
441 try:
442 value = inst.value
443 except AttributeError:
444 return False
445 else:
Anthony Sottile74176222019-01-18 11:30:28 -0800446 return (
447 isinstance(value, _const_types[cls]) and
448 not isinstance(value, _const_types_not.get(cls, ()))
449 )
Serhiy Storchaka3f228112018-09-27 17:42:37 +0300450 return type.__instancecheck__(cls, inst)
451
452def _new(cls, *args, **kwargs):
453 if cls in _const_types:
454 return Constant(*args, **kwargs)
455 return Constant.__new__(cls, *args, **kwargs)
456
457class Num(Constant, metaclass=_ABC):
458 _fields = ('n',)
459 __new__ = _new
460
461class Str(Constant, metaclass=_ABC):
462 _fields = ('s',)
463 __new__ = _new
464
465class Bytes(Constant, metaclass=_ABC):
466 _fields = ('s',)
467 __new__ = _new
468
469class NameConstant(Constant, metaclass=_ABC):
470 __new__ = _new
471
472class Ellipsis(Constant, metaclass=_ABC):
473 _fields = ()
474
475 def __new__(cls, *args, **kwargs):
476 if cls is Ellipsis:
477 return Constant(..., *args, **kwargs)
478 return Constant.__new__(cls, *args, **kwargs)
479
480_const_types = {
481 Num: (int, float, complex),
482 Str: (str,),
483 Bytes: (bytes,),
484 NameConstant: (type(None), bool),
485 Ellipsis: (type(...),),
486}
Anthony Sottile74176222019-01-18 11:30:28 -0800487_const_types_not = {
488 Num: (bool,),
489}