blob: 64e7a2551fb1aa6d12adebf3e062851d4a60d702 [file] [log] [blame]
Georg Brandl0c77a822008-06-10 16:37:50 +00001"""
2 ast
3 ~~~
4
5 The `ast` module helps Python applications to process trees of the Python
6 abstract syntax grammar. The abstract syntax itself might change with
7 each Python release; this module helps to find out programmatically what
8 the current grammar looks like and allows modifications of it.
9
10 An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
11 a flag to the `compile()` builtin function or by using the `parse()`
12 function from this module. The result will be a tree of objects whose
13 classes all inherit from `ast.AST`.
14
15 A modified abstract syntax tree can be compiled into a Python code object
16 using the built-in `compile()` function.
17
18 Additionally various helper functions are provided that make working with
19 the trees simpler. The main intention of the helper functions and this
20 module in general is to provide an easy to use interface for libraries
21 that work tightly with the python syntax (template engines for example).
22
23
24 :copyright: Copyright 2008 by Armin Ronacher.
25 :license: Python License.
26"""
27from _ast import *
28
29
Guido van Rossum495da292019-03-07 12:38:08 -080030def parse(source, filename='<unknown>', mode='exec', *,
31 type_comments=False, feature_version=-1):
Georg Brandl0c77a822008-06-10 16:37:50 +000032 """
Terry Reedyfeac6242011-01-24 21:36:03 +000033 Parse the source into an AST node.
34 Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
Guido van Rossumdcfcd142019-01-31 03:40:27 -080035 Pass type_comments=True to get back type comments where the syntax allows.
Georg Brandl0c77a822008-06-10 16:37:50 +000036 """
Guido van Rossumdcfcd142019-01-31 03:40:27 -080037 flags = PyCF_ONLY_AST
38 if type_comments:
39 flags |= PyCF_TYPE_COMMENTS
Guido van Rossum495da292019-03-07 12:38:08 -080040 return compile(source, filename, mode, flags,
41 feature_version=feature_version)
Georg Brandl0c77a822008-06-10 16:37:50 +000042
43
44def literal_eval(node_or_string):
45 """
46 Safely evaluate an expression node or a string containing a Python
47 expression. The string or node provided may only consist of the following
Éric Araujo2a83cc62011-04-17 19:10:27 +020048 Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
49 sets, booleans, and None.
Georg Brandl0c77a822008-06-10 16:37:50 +000050 """
Georg Brandl0c77a822008-06-10 16:37:50 +000051 if isinstance(node_or_string, str):
52 node_or_string = parse(node_or_string, mode='eval')
53 if isinstance(node_or_string, Expression):
54 node_or_string = node_or_string.body
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020055 def _convert_num(node):
56 if isinstance(node, Constant):
Serhiy Storchaka3f228112018-09-27 17:42:37 +030057 if type(node.value) in (int, float, complex):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020058 return node.value
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020059 raise ValueError('malformed node or string: ' + repr(node))
60 def _convert_signed_num(node):
61 if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
62 operand = _convert_num(node.operand)
63 if isinstance(node.op, UAdd):
64 return + operand
65 else:
66 return - operand
67 return _convert_num(node)
Georg Brandl0c77a822008-06-10 16:37:50 +000068 def _convert(node):
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010069 if isinstance(node, Constant):
70 return node.value
Georg Brandl0c77a822008-06-10 16:37:50 +000071 elif isinstance(node, Tuple):
72 return tuple(map(_convert, node.elts))
73 elif isinstance(node, List):
74 return list(map(_convert, node.elts))
Georg Brandl492f3fc2010-07-11 09:41:21 +000075 elif isinstance(node, Set):
76 return set(map(_convert, node.elts))
Georg Brandl0c77a822008-06-10 16:37:50 +000077 elif isinstance(node, Dict):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020078 return dict(zip(map(_convert, node.keys),
79 map(_convert, node.values)))
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010080 elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020081 left = _convert_signed_num(node.left)
82 right = _convert_num(node.right)
83 if isinstance(left, (int, float)) and isinstance(right, complex):
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010084 if isinstance(node.op, Add):
85 return left + right
86 else:
87 return left - right
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020088 return _convert_signed_num(node)
Georg Brandl0c77a822008-06-10 16:37:50 +000089 return _convert(node_or_string)
90
91
92def dump(node, annotate_fields=True, include_attributes=False):
93 """
94 Return a formatted dump of the tree in *node*. This is mainly useful for
95 debugging purposes. The returned string will show the names and the values
96 for fields. This makes the code impossible to evaluate, so if evaluation is
97 wanted *annotate_fields* must be set to False. Attributes such as line
Benjamin Petersondcf97b92008-07-02 17:30:14 +000098 numbers and column offsets are not dumped by default. If this is wanted,
Georg Brandl0c77a822008-06-10 16:37:50 +000099 *include_attributes* can be set to True.
100 """
101 def _format(node):
102 if isinstance(node, AST):
103 fields = [(a, _format(b)) for a, b in iter_fields(node)]
104 rv = '%s(%s' % (node.__class__.__name__, ', '.join(
105 ('%s=%s' % field for field in fields)
106 if annotate_fields else
107 (b for a, b in fields)
108 ))
109 if include_attributes and node._attributes:
110 rv += fields and ', ' or ' '
111 rv += ', '.join('%s=%s' % (a, _format(getattr(node, a)))
112 for a in node._attributes)
113 return rv + ')'
114 elif isinstance(node, list):
115 return '[%s]' % ', '.join(_format(x) for x in node)
116 return repr(node)
117 if not isinstance(node, AST):
118 raise TypeError('expected AST, got %r' % node.__class__.__name__)
119 return _format(node)
120
121
122def copy_location(new_node, old_node):
123 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000124 Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset`
125 attributes) from *old_node* to *new_node* if possible, and return *new_node*.
Georg Brandl0c77a822008-06-10 16:37:50 +0000126 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000127 for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset':
Georg Brandl0c77a822008-06-10 16:37:50 +0000128 if attr in old_node._attributes and attr in new_node._attributes \
129 and hasattr(old_node, attr):
130 setattr(new_node, attr, getattr(old_node, attr))
131 return new_node
132
133
134def fix_missing_locations(node):
135 """
136 When you compile a node tree with compile(), the compiler expects lineno and
137 col_offset attributes for every node that supports them. This is rather
138 tedious to fill in for generated nodes, so this helper adds these attributes
139 recursively where not already set, by setting them to the values of the
140 parent node. It works recursively starting at *node*.
141 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000142 def _fix(node, lineno, col_offset, end_lineno, end_col_offset):
Georg Brandl0c77a822008-06-10 16:37:50 +0000143 if 'lineno' in node._attributes:
144 if not hasattr(node, 'lineno'):
145 node.lineno = lineno
146 else:
147 lineno = node.lineno
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000148 if 'end_lineno' in node._attributes:
149 if not hasattr(node, 'end_lineno'):
150 node.end_lineno = end_lineno
151 else:
152 end_lineno = node.end_lineno
Georg Brandl0c77a822008-06-10 16:37:50 +0000153 if 'col_offset' in node._attributes:
154 if not hasattr(node, 'col_offset'):
155 node.col_offset = col_offset
156 else:
157 col_offset = node.col_offset
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000158 if 'end_col_offset' in node._attributes:
159 if not hasattr(node, 'end_col_offset'):
160 node.end_col_offset = end_col_offset
161 else:
162 end_col_offset = node.end_col_offset
Georg Brandl0c77a822008-06-10 16:37:50 +0000163 for child in iter_child_nodes(node):
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000164 _fix(child, lineno, col_offset, end_lineno, end_col_offset)
165 _fix(node, 1, 0, 1, 0)
Georg Brandl0c77a822008-06-10 16:37:50 +0000166 return node
167
168
169def increment_lineno(node, n=1):
170 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000171 Increment the line number and end line number of each node in the tree
172 starting at *node* by *n*. This is useful to "move code" to a different
173 location in a file.
Georg Brandl0c77a822008-06-10 16:37:50 +0000174 """
Georg Brandl0c77a822008-06-10 16:37:50 +0000175 for child in walk(node):
176 if 'lineno' in child._attributes:
177 child.lineno = getattr(child, 'lineno', 0) + n
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000178 if 'end_lineno' in child._attributes:
179 child.end_lineno = getattr(child, 'end_lineno', 0) + n
Georg Brandl0c77a822008-06-10 16:37:50 +0000180 return node
181
182
183def iter_fields(node):
184 """
185 Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
186 that is present on *node*.
187 """
188 for field in node._fields:
189 try:
190 yield field, getattr(node, field)
191 except AttributeError:
192 pass
193
194
195def iter_child_nodes(node):
196 """
197 Yield all direct child nodes of *node*, that is, all fields that are nodes
198 and all items of fields that are lists of nodes.
199 """
200 for name, field in iter_fields(node):
201 if isinstance(field, AST):
202 yield field
203 elif isinstance(field, list):
204 for item in field:
205 if isinstance(item, AST):
206 yield item
207
208
209def get_docstring(node, clean=True):
210 """
211 Return the docstring for the given node or None if no docstring can
212 be found. If the node provided does not have docstrings a TypeError
213 will be raised.
Matthias Bussonnier41cea702017-02-23 22:44:19 -0800214
215 If *clean* is `True`, all tabs are expanded to spaces and any whitespace
216 that can be uniformly removed from the second line onwards is removed.
Georg Brandl0c77a822008-06-10 16:37:50 +0000217 """
Yury Selivanov2f07a662015-07-23 08:54:35 +0300218 if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
Georg Brandl0c77a822008-06-10 16:37:50 +0000219 raise TypeError("%r can't have docstrings" % node.__class__.__name__)
Serhiy Storchaka08f127a2018-06-15 11:05:15 +0300220 if not(node.body and isinstance(node.body[0], Expr)):
Serhiy Storchaka73cbe7a2018-05-29 12:04:55 +0300221 return None
222 node = node.body[0].value
223 if isinstance(node, Str):
224 text = node.s
225 elif isinstance(node, Constant) and isinstance(node.value, str):
226 text = node.value
227 else:
228 return None
Serhiy Storchaka08f127a2018-06-15 11:05:15 +0300229 if clean:
Victor Stinnerf2c1aa12016-01-26 00:40:57 +0100230 import inspect
231 text = inspect.cleandoc(text)
232 return text
Georg Brandl0c77a822008-06-10 16:37:50 +0000233
234
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000235def _splitlines_no_ff(source):
236 """Split a string into lines ignoring form feed and other chars.
237
238 This mimics how the Python parser splits source code.
239 """
240 idx = 0
241 lines = []
242 next_line = ''
243 while idx < len(source):
244 c = source[idx]
245 next_line += c
246 idx += 1
247 # Keep \r\n together
248 if c == '\r' and idx < len(source) and source[idx] == '\n':
249 next_line += '\n'
250 idx += 1
251 if c in '\r\n':
252 lines.append(next_line)
253 next_line = ''
254
255 if next_line:
256 lines.append(next_line)
257 return lines
258
259
260def _pad_whitespace(source):
261 """Replace all chars except '\f\t' in a line with spaces."""
262 result = ''
263 for c in source:
264 if c in '\f\t':
265 result += c
266 else:
267 result += ' '
268 return result
269
270
271def get_source_segment(source, node, *, padded=False):
272 """Get source code segment of the *source* that generated *node*.
273
274 If some location information (`lineno`, `end_lineno`, `col_offset`,
275 or `end_col_offset`) is missing, return None.
276
277 If *padded* is `True`, the first line of a multi-line statement will
278 be padded with spaces to match its original position.
279 """
280 try:
281 lineno = node.lineno - 1
282 end_lineno = node.end_lineno - 1
283 col_offset = node.col_offset
284 end_col_offset = node.end_col_offset
285 except AttributeError:
286 return None
287
288 lines = _splitlines_no_ff(source)
289 if end_lineno == lineno:
290 return lines[lineno].encode()[col_offset:end_col_offset].decode()
291
292 if padded:
293 padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode())
294 else:
295 padding = ''
296
297 first = padding + lines[lineno].encode()[col_offset:].decode()
298 last = lines[end_lineno].encode()[:end_col_offset].decode()
299 lines = lines[lineno+1:end_lineno]
300
301 lines.insert(0, first)
302 lines.append(last)
303 return ''.join(lines)
304
305
Georg Brandl0c77a822008-06-10 16:37:50 +0000306def walk(node):
307 """
Georg Brandl619e7ba2011-01-09 07:38:51 +0000308 Recursively yield all descendant nodes in the tree starting at *node*
309 (including *node* itself), in no specified order. This is useful if you
310 only want to modify nodes in place and don't care about the context.
Georg Brandl0c77a822008-06-10 16:37:50 +0000311 """
312 from collections import deque
313 todo = deque([node])
314 while todo:
315 node = todo.popleft()
316 todo.extend(iter_child_nodes(node))
317 yield node
318
319
320class NodeVisitor(object):
321 """
322 A node visitor base class that walks the abstract syntax tree and calls a
323 visitor function for every node found. This function may return a value
324 which is forwarded by the `visit` method.
325
326 This class is meant to be subclassed, with the subclass adding visitor
327 methods.
328
329 Per default the visitor functions for the nodes are ``'visit_'`` +
330 class name of the node. So a `TryFinally` node visit function would
331 be `visit_TryFinally`. This behavior can be changed by overriding
332 the `visit` method. If no visitor function exists for a node
333 (return value `None`) the `generic_visit` visitor is used instead.
334
335 Don't use the `NodeVisitor` if you want to apply changes to nodes during
336 traversing. For this a special visitor exists (`NodeTransformer`) that
337 allows modifications.
338 """
339
340 def visit(self, node):
341 """Visit a node."""
342 method = 'visit_' + node.__class__.__name__
343 visitor = getattr(self, method, self.generic_visit)
344 return visitor(node)
345
346 def generic_visit(self, node):
347 """Called if no explicit visitor function exists for a node."""
348 for field, value in iter_fields(node):
349 if isinstance(value, list):
350 for item in value:
351 if isinstance(item, AST):
352 self.visit(item)
353 elif isinstance(value, AST):
354 self.visit(value)
355
356
357class NodeTransformer(NodeVisitor):
358 """
359 A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
360 allows modification of nodes.
361
362 The `NodeTransformer` will walk the AST and use the return value of the
363 visitor methods to replace or remove the old node. If the return value of
364 the visitor method is ``None``, the node will be removed from its location,
365 otherwise it is replaced with the return value. The return value may be the
366 original node in which case no replacement takes place.
367
368 Here is an example transformer that rewrites all occurrences of name lookups
369 (``foo``) to ``data['foo']``::
370
371 class RewriteName(NodeTransformer):
372
373 def visit_Name(self, node):
374 return copy_location(Subscript(
375 value=Name(id='data', ctx=Load()),
376 slice=Index(value=Str(s=node.id)),
377 ctx=node.ctx
378 ), node)
379
380 Keep in mind that if the node you're operating on has child nodes you must
381 either transform the child nodes yourself or call the :meth:`generic_visit`
382 method for the node first.
383
384 For nodes that were part of a collection of statements (that applies to all
385 statement nodes), the visitor may also return a list of nodes rather than
386 just a single node.
387
388 Usually you use the transformer like this::
389
390 node = YourTransformer().visit(node)
391 """
392
393 def generic_visit(self, node):
394 for field, old_value in iter_fields(node):
Georg Brandl0c77a822008-06-10 16:37:50 +0000395 if isinstance(old_value, list):
396 new_values = []
397 for value in old_value:
398 if isinstance(value, AST):
399 value = self.visit(value)
400 if value is None:
401 continue
402 elif not isinstance(value, AST):
403 new_values.extend(value)
404 continue
405 new_values.append(value)
406 old_value[:] = new_values
407 elif isinstance(old_value, AST):
408 new_node = self.visit(old_value)
409 if new_node is None:
410 delattr(node, field)
411 else:
412 setattr(node, field, new_node)
413 return node
Serhiy Storchaka3f228112018-09-27 17:42:37 +0300414
415
416# The following code is for backward compatibility.
417# It will be removed in future.
418
419def _getter(self):
420 return self.value
421
422def _setter(self, value):
423 self.value = value
424
425Constant.n = property(_getter, _setter)
426Constant.s = property(_getter, _setter)
427
428class _ABC(type):
429
430 def __instancecheck__(cls, inst):
431 if not isinstance(inst, Constant):
432 return False
433 if cls in _const_types:
434 try:
435 value = inst.value
436 except AttributeError:
437 return False
438 else:
Anthony Sottile74176222019-01-18 11:30:28 -0800439 return (
440 isinstance(value, _const_types[cls]) and
441 not isinstance(value, _const_types_not.get(cls, ()))
442 )
Serhiy Storchaka3f228112018-09-27 17:42:37 +0300443 return type.__instancecheck__(cls, inst)
444
445def _new(cls, *args, **kwargs):
446 if cls in _const_types:
447 return Constant(*args, **kwargs)
448 return Constant.__new__(cls, *args, **kwargs)
449
450class Num(Constant, metaclass=_ABC):
451 _fields = ('n',)
452 __new__ = _new
453
454class Str(Constant, metaclass=_ABC):
455 _fields = ('s',)
456 __new__ = _new
457
458class Bytes(Constant, metaclass=_ABC):
459 _fields = ('s',)
460 __new__ = _new
461
462class NameConstant(Constant, metaclass=_ABC):
463 __new__ = _new
464
465class Ellipsis(Constant, metaclass=_ABC):
466 _fields = ()
467
468 def __new__(cls, *args, **kwargs):
469 if cls is Ellipsis:
470 return Constant(..., *args, **kwargs)
471 return Constant.__new__(cls, *args, **kwargs)
472
473_const_types = {
474 Num: (int, float, complex),
475 Str: (str,),
476 Bytes: (bytes,),
477 NameConstant: (type(None), bool),
478 Ellipsis: (type(...),),
479}
Anthony Sottile74176222019-01-18 11:30:28 -0800480_const_types_not = {
481 Num: (bool,),
482}