blob: 470a74b3b5ff794c373dd6c2a09277325b358b20 [file] [log] [blame]
Georg Brandl0c77a822008-06-10 16:37:50 +00001"""
2 ast
3 ~~~
4
5 The `ast` module helps Python applications to process trees of the Python
6 abstract syntax grammar. The abstract syntax itself might change with
7 each Python release; this module helps to find out programmatically what
8 the current grammar looks like and allows modifications of it.
9
10 An abstract syntax tree can be generated by passing `ast.PyCF_ONLY_AST` as
11 a flag to the `compile()` builtin function or by using the `parse()`
12 function from this module. The result will be a tree of objects whose
13 classes all inherit from `ast.AST`.
14
15 A modified abstract syntax tree can be compiled into a Python code object
16 using the built-in `compile()` function.
17
18 Additionally various helper functions are provided that make working with
19 the trees simpler. The main intention of the helper functions and this
20 module in general is to provide an easy to use interface for libraries
21 that work tightly with the python syntax (template engines for example).
22
23
24 :copyright: Copyright 2008 by Armin Ronacher.
25 :license: Python License.
26"""
27from _ast import *
28
29
Guido van Rossumdcfcd142019-01-31 03:40:27 -080030def parse(source, filename='<unknown>', mode='exec', *, type_comments=False):
Georg Brandl0c77a822008-06-10 16:37:50 +000031 """
Terry Reedyfeac6242011-01-24 21:36:03 +000032 Parse the source into an AST node.
33 Equivalent to compile(source, filename, mode, PyCF_ONLY_AST).
Guido van Rossumdcfcd142019-01-31 03:40:27 -080034 Pass type_comments=True to get back type comments where the syntax allows.
Georg Brandl0c77a822008-06-10 16:37:50 +000035 """
Guido van Rossumdcfcd142019-01-31 03:40:27 -080036 flags = PyCF_ONLY_AST
37 if type_comments:
38 flags |= PyCF_TYPE_COMMENTS
39 return compile(source, filename, mode, flags)
Georg Brandl0c77a822008-06-10 16:37:50 +000040
41
42def literal_eval(node_or_string):
43 """
44 Safely evaluate an expression node or a string containing a Python
45 expression. The string or node provided may only consist of the following
Éric Araujo2a83cc62011-04-17 19:10:27 +020046 Python literal structures: strings, bytes, numbers, tuples, lists, dicts,
47 sets, booleans, and None.
Georg Brandl0c77a822008-06-10 16:37:50 +000048 """
Georg Brandl0c77a822008-06-10 16:37:50 +000049 if isinstance(node_or_string, str):
50 node_or_string = parse(node_or_string, mode='eval')
51 if isinstance(node_or_string, Expression):
52 node_or_string = node_or_string.body
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020053 def _convert_num(node):
54 if isinstance(node, Constant):
Serhiy Storchaka3f228112018-09-27 17:42:37 +030055 if type(node.value) in (int, float, complex):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020056 return node.value
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020057 raise ValueError('malformed node or string: ' + repr(node))
58 def _convert_signed_num(node):
59 if isinstance(node, UnaryOp) and isinstance(node.op, (UAdd, USub)):
60 operand = _convert_num(node.operand)
61 if isinstance(node.op, UAdd):
62 return + operand
63 else:
64 return - operand
65 return _convert_num(node)
Georg Brandl0c77a822008-06-10 16:37:50 +000066 def _convert(node):
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010067 if isinstance(node, Constant):
68 return node.value
Georg Brandl0c77a822008-06-10 16:37:50 +000069 elif isinstance(node, Tuple):
70 return tuple(map(_convert, node.elts))
71 elif isinstance(node, List):
72 return list(map(_convert, node.elts))
Georg Brandl492f3fc2010-07-11 09:41:21 +000073 elif isinstance(node, Set):
74 return set(map(_convert, node.elts))
Georg Brandl0c77a822008-06-10 16:37:50 +000075 elif isinstance(node, Dict):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020076 return dict(zip(map(_convert, node.keys),
77 map(_convert, node.values)))
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010078 elif isinstance(node, BinOp) and isinstance(node.op, (Add, Sub)):
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020079 left = _convert_signed_num(node.left)
80 right = _convert_num(node.right)
81 if isinstance(left, (int, float)) and isinstance(right, complex):
Victor Stinnerf2c1aa12016-01-26 00:40:57 +010082 if isinstance(node.op, Add):
83 return left + right
84 else:
85 return left - right
Serhiy Storchakad8ac4d12018-01-04 11:15:39 +020086 return _convert_signed_num(node)
Georg Brandl0c77a822008-06-10 16:37:50 +000087 return _convert(node_or_string)
88
89
90def dump(node, annotate_fields=True, include_attributes=False):
91 """
92 Return a formatted dump of the tree in *node*. This is mainly useful for
93 debugging purposes. The returned string will show the names and the values
94 for fields. This makes the code impossible to evaluate, so if evaluation is
95 wanted *annotate_fields* must be set to False. Attributes such as line
Benjamin Petersondcf97b92008-07-02 17:30:14 +000096 numbers and column offsets are not dumped by default. If this is wanted,
Georg Brandl0c77a822008-06-10 16:37:50 +000097 *include_attributes* can be set to True.
98 """
99 def _format(node):
100 if isinstance(node, AST):
101 fields = [(a, _format(b)) for a, b in iter_fields(node)]
102 rv = '%s(%s' % (node.__class__.__name__, ', '.join(
103 ('%s=%s' % field for field in fields)
104 if annotate_fields else
105 (b for a, b in fields)
106 ))
107 if include_attributes and node._attributes:
108 rv += fields and ', ' or ' '
109 rv += ', '.join('%s=%s' % (a, _format(getattr(node, a)))
110 for a in node._attributes)
111 return rv + ')'
112 elif isinstance(node, list):
113 return '[%s]' % ', '.join(_format(x) for x in node)
114 return repr(node)
115 if not isinstance(node, AST):
116 raise TypeError('expected AST, got %r' % node.__class__.__name__)
117 return _format(node)
118
119
120def copy_location(new_node, old_node):
121 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000122 Copy source location (`lineno`, `col_offset`, `end_lineno`, and `end_col_offset`
123 attributes) from *old_node* to *new_node* if possible, and return *new_node*.
Georg Brandl0c77a822008-06-10 16:37:50 +0000124 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000125 for attr in 'lineno', 'col_offset', 'end_lineno', 'end_col_offset':
Georg Brandl0c77a822008-06-10 16:37:50 +0000126 if attr in old_node._attributes and attr in new_node._attributes \
127 and hasattr(old_node, attr):
128 setattr(new_node, attr, getattr(old_node, attr))
129 return new_node
130
131
132def fix_missing_locations(node):
133 """
134 When you compile a node tree with compile(), the compiler expects lineno and
135 col_offset attributes for every node that supports them. This is rather
136 tedious to fill in for generated nodes, so this helper adds these attributes
137 recursively where not already set, by setting them to the values of the
138 parent node. It works recursively starting at *node*.
139 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000140 def _fix(node, lineno, col_offset, end_lineno, end_col_offset):
Georg Brandl0c77a822008-06-10 16:37:50 +0000141 if 'lineno' in node._attributes:
142 if not hasattr(node, 'lineno'):
143 node.lineno = lineno
144 else:
145 lineno = node.lineno
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000146 if 'end_lineno' in node._attributes:
147 if not hasattr(node, 'end_lineno'):
148 node.end_lineno = end_lineno
149 else:
150 end_lineno = node.end_lineno
Georg Brandl0c77a822008-06-10 16:37:50 +0000151 if 'col_offset' in node._attributes:
152 if not hasattr(node, 'col_offset'):
153 node.col_offset = col_offset
154 else:
155 col_offset = node.col_offset
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000156 if 'end_col_offset' in node._attributes:
157 if not hasattr(node, 'end_col_offset'):
158 node.end_col_offset = end_col_offset
159 else:
160 end_col_offset = node.end_col_offset
Georg Brandl0c77a822008-06-10 16:37:50 +0000161 for child in iter_child_nodes(node):
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000162 _fix(child, lineno, col_offset, end_lineno, end_col_offset)
163 _fix(node, 1, 0, 1, 0)
Georg Brandl0c77a822008-06-10 16:37:50 +0000164 return node
165
166
167def increment_lineno(node, n=1):
168 """
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000169 Increment the line number and end line number of each node in the tree
170 starting at *node* by *n*. This is useful to "move code" to a different
171 location in a file.
Georg Brandl0c77a822008-06-10 16:37:50 +0000172 """
Georg Brandl0c77a822008-06-10 16:37:50 +0000173 for child in walk(node):
174 if 'lineno' in child._attributes:
175 child.lineno = getattr(child, 'lineno', 0) + n
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000176 if 'end_lineno' in child._attributes:
177 child.end_lineno = getattr(child, 'end_lineno', 0) + n
Georg Brandl0c77a822008-06-10 16:37:50 +0000178 return node
179
180
181def iter_fields(node):
182 """
183 Yield a tuple of ``(fieldname, value)`` for each field in ``node._fields``
184 that is present on *node*.
185 """
186 for field in node._fields:
187 try:
188 yield field, getattr(node, field)
189 except AttributeError:
190 pass
191
192
193def iter_child_nodes(node):
194 """
195 Yield all direct child nodes of *node*, that is, all fields that are nodes
196 and all items of fields that are lists of nodes.
197 """
198 for name, field in iter_fields(node):
199 if isinstance(field, AST):
200 yield field
201 elif isinstance(field, list):
202 for item in field:
203 if isinstance(item, AST):
204 yield item
205
206
207def get_docstring(node, clean=True):
208 """
209 Return the docstring for the given node or None if no docstring can
210 be found. If the node provided does not have docstrings a TypeError
211 will be raised.
Matthias Bussonnier41cea702017-02-23 22:44:19 -0800212
213 If *clean* is `True`, all tabs are expanded to spaces and any whitespace
214 that can be uniformly removed from the second line onwards is removed.
Georg Brandl0c77a822008-06-10 16:37:50 +0000215 """
Yury Selivanov2f07a662015-07-23 08:54:35 +0300216 if not isinstance(node, (AsyncFunctionDef, FunctionDef, ClassDef, Module)):
Georg Brandl0c77a822008-06-10 16:37:50 +0000217 raise TypeError("%r can't have docstrings" % node.__class__.__name__)
Serhiy Storchaka08f127a2018-06-15 11:05:15 +0300218 if not(node.body and isinstance(node.body[0], Expr)):
Serhiy Storchaka73cbe7a2018-05-29 12:04:55 +0300219 return None
220 node = node.body[0].value
221 if isinstance(node, Str):
222 text = node.s
223 elif isinstance(node, Constant) and isinstance(node.value, str):
224 text = node.value
225 else:
226 return None
Serhiy Storchaka08f127a2018-06-15 11:05:15 +0300227 if clean:
Victor Stinnerf2c1aa12016-01-26 00:40:57 +0100228 import inspect
229 text = inspect.cleandoc(text)
230 return text
Georg Brandl0c77a822008-06-10 16:37:50 +0000231
232
Ivan Levkivskyi9932a222019-01-22 11:18:22 +0000233def _splitlines_no_ff(source):
234 """Split a string into lines ignoring form feed and other chars.
235
236 This mimics how the Python parser splits source code.
237 """
238 idx = 0
239 lines = []
240 next_line = ''
241 while idx < len(source):
242 c = source[idx]
243 next_line += c
244 idx += 1
245 # Keep \r\n together
246 if c == '\r' and idx < len(source) and source[idx] == '\n':
247 next_line += '\n'
248 idx += 1
249 if c in '\r\n':
250 lines.append(next_line)
251 next_line = ''
252
253 if next_line:
254 lines.append(next_line)
255 return lines
256
257
258def _pad_whitespace(source):
259 """Replace all chars except '\f\t' in a line with spaces."""
260 result = ''
261 for c in source:
262 if c in '\f\t':
263 result += c
264 else:
265 result += ' '
266 return result
267
268
269def get_source_segment(source, node, *, padded=False):
270 """Get source code segment of the *source* that generated *node*.
271
272 If some location information (`lineno`, `end_lineno`, `col_offset`,
273 or `end_col_offset`) is missing, return None.
274
275 If *padded* is `True`, the first line of a multi-line statement will
276 be padded with spaces to match its original position.
277 """
278 try:
279 lineno = node.lineno - 1
280 end_lineno = node.end_lineno - 1
281 col_offset = node.col_offset
282 end_col_offset = node.end_col_offset
283 except AttributeError:
284 return None
285
286 lines = _splitlines_no_ff(source)
287 if end_lineno == lineno:
288 return lines[lineno].encode()[col_offset:end_col_offset].decode()
289
290 if padded:
291 padding = _pad_whitespace(lines[lineno].encode()[:col_offset].decode())
292 else:
293 padding = ''
294
295 first = padding + lines[lineno].encode()[col_offset:].decode()
296 last = lines[end_lineno].encode()[:end_col_offset].decode()
297 lines = lines[lineno+1:end_lineno]
298
299 lines.insert(0, first)
300 lines.append(last)
301 return ''.join(lines)
302
303
Georg Brandl0c77a822008-06-10 16:37:50 +0000304def walk(node):
305 """
Georg Brandl619e7ba2011-01-09 07:38:51 +0000306 Recursively yield all descendant nodes in the tree starting at *node*
307 (including *node* itself), in no specified order. This is useful if you
308 only want to modify nodes in place and don't care about the context.
Georg Brandl0c77a822008-06-10 16:37:50 +0000309 """
310 from collections import deque
311 todo = deque([node])
312 while todo:
313 node = todo.popleft()
314 todo.extend(iter_child_nodes(node))
315 yield node
316
317
318class NodeVisitor(object):
319 """
320 A node visitor base class that walks the abstract syntax tree and calls a
321 visitor function for every node found. This function may return a value
322 which is forwarded by the `visit` method.
323
324 This class is meant to be subclassed, with the subclass adding visitor
325 methods.
326
327 Per default the visitor functions for the nodes are ``'visit_'`` +
328 class name of the node. So a `TryFinally` node visit function would
329 be `visit_TryFinally`. This behavior can be changed by overriding
330 the `visit` method. If no visitor function exists for a node
331 (return value `None`) the `generic_visit` visitor is used instead.
332
333 Don't use the `NodeVisitor` if you want to apply changes to nodes during
334 traversing. For this a special visitor exists (`NodeTransformer`) that
335 allows modifications.
336 """
337
338 def visit(self, node):
339 """Visit a node."""
340 method = 'visit_' + node.__class__.__name__
341 visitor = getattr(self, method, self.generic_visit)
342 return visitor(node)
343
344 def generic_visit(self, node):
345 """Called if no explicit visitor function exists for a node."""
346 for field, value in iter_fields(node):
347 if isinstance(value, list):
348 for item in value:
349 if isinstance(item, AST):
350 self.visit(item)
351 elif isinstance(value, AST):
352 self.visit(value)
353
354
355class NodeTransformer(NodeVisitor):
356 """
357 A :class:`NodeVisitor` subclass that walks the abstract syntax tree and
358 allows modification of nodes.
359
360 The `NodeTransformer` will walk the AST and use the return value of the
361 visitor methods to replace or remove the old node. If the return value of
362 the visitor method is ``None``, the node will be removed from its location,
363 otherwise it is replaced with the return value. The return value may be the
364 original node in which case no replacement takes place.
365
366 Here is an example transformer that rewrites all occurrences of name lookups
367 (``foo``) to ``data['foo']``::
368
369 class RewriteName(NodeTransformer):
370
371 def visit_Name(self, node):
372 return copy_location(Subscript(
373 value=Name(id='data', ctx=Load()),
374 slice=Index(value=Str(s=node.id)),
375 ctx=node.ctx
376 ), node)
377
378 Keep in mind that if the node you're operating on has child nodes you must
379 either transform the child nodes yourself or call the :meth:`generic_visit`
380 method for the node first.
381
382 For nodes that were part of a collection of statements (that applies to all
383 statement nodes), the visitor may also return a list of nodes rather than
384 just a single node.
385
386 Usually you use the transformer like this::
387
388 node = YourTransformer().visit(node)
389 """
390
391 def generic_visit(self, node):
392 for field, old_value in iter_fields(node):
Georg Brandl0c77a822008-06-10 16:37:50 +0000393 if isinstance(old_value, list):
394 new_values = []
395 for value in old_value:
396 if isinstance(value, AST):
397 value = self.visit(value)
398 if value is None:
399 continue
400 elif not isinstance(value, AST):
401 new_values.extend(value)
402 continue
403 new_values.append(value)
404 old_value[:] = new_values
405 elif isinstance(old_value, AST):
406 new_node = self.visit(old_value)
407 if new_node is None:
408 delattr(node, field)
409 else:
410 setattr(node, field, new_node)
411 return node
Serhiy Storchaka3f228112018-09-27 17:42:37 +0300412
413
414# The following code is for backward compatibility.
415# It will be removed in future.
416
417def _getter(self):
418 return self.value
419
420def _setter(self, value):
421 self.value = value
422
423Constant.n = property(_getter, _setter)
424Constant.s = property(_getter, _setter)
425
426class _ABC(type):
427
428 def __instancecheck__(cls, inst):
429 if not isinstance(inst, Constant):
430 return False
431 if cls in _const_types:
432 try:
433 value = inst.value
434 except AttributeError:
435 return False
436 else:
Anthony Sottile74176222019-01-18 11:30:28 -0800437 return (
438 isinstance(value, _const_types[cls]) and
439 not isinstance(value, _const_types_not.get(cls, ()))
440 )
Serhiy Storchaka3f228112018-09-27 17:42:37 +0300441 return type.__instancecheck__(cls, inst)
442
443def _new(cls, *args, **kwargs):
444 if cls in _const_types:
445 return Constant(*args, **kwargs)
446 return Constant.__new__(cls, *args, **kwargs)
447
448class Num(Constant, metaclass=_ABC):
449 _fields = ('n',)
450 __new__ = _new
451
452class Str(Constant, metaclass=_ABC):
453 _fields = ('s',)
454 __new__ = _new
455
456class Bytes(Constant, metaclass=_ABC):
457 _fields = ('s',)
458 __new__ = _new
459
460class NameConstant(Constant, metaclass=_ABC):
461 __new__ = _new
462
463class Ellipsis(Constant, metaclass=_ABC):
464 _fields = ()
465
466 def __new__(cls, *args, **kwargs):
467 if cls is Ellipsis:
468 return Constant(..., *args, **kwargs)
469 return Constant.__new__(cls, *args, **kwargs)
470
471_const_types = {
472 Num: (int, float, complex),
473 Str: (str,),
474 Bytes: (bytes,),
475 NameConstant: (type(None), bool),
476 Ellipsis: (type(...),),
477}
Anthony Sottile74176222019-01-18 11:30:28 -0800478_const_types_not = {
479 Num: (bool,),
480}