blob: ca4094c1bb7f3453647b45094db4734e20e602e6 [file] [log] [blame]
Guido van Rossum421c2241997-11-18 15:47:55 +00001"""Disassembler of Python byte code into mnemonics."""
Guido van Rossum217a5fa1990-12-26 15:40:07 +00002
3import sys
Guido van Rossum18aef3c1997-03-14 04:15:43 +00004import types
Nick Coghlanb39fd0c2013-05-06 23:59:20 +10005import collections
Guido van Rossum217a5fa1990-12-26 15:40:07 +00006
Skip Montanaro19c6ba32003-02-27 21:29:27 +00007from opcode import *
8from opcode import __all__ as _opcodes_all
9
Nick Coghlan7646f7e2010-09-10 12:24:24 +000010__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100011 "findlinestarts", "findlabels", "show_code",
12 "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
Skip Montanaro19c6ba32003-02-27 21:29:27 +000013del _opcodes_all
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000014
Benjamin Peterson6ef9a842010-04-04 23:26:50 +000015_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
16
Nick Coghlan5c8b54e2010-07-03 07:36:51 +000017def _try_compile(source, name):
18 """Attempts to compile the given source, first as an expression and
19 then as a statement if the first approach fails.
20
21 Utility function to accept strings in functions that otherwise
22 expect code objects
23 """
Nick Coghlan5c8b54e2010-07-03 07:36:51 +000024 try:
25 c = compile(source, name, 'eval')
26 except SyntaxError:
27 c = compile(source, name, 'exec')
28 return c
29
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100030def dis(x=None, *, file=None):
Tim Peters88869f92001-01-14 23:36:06 +000031 """Disassemble classes, methods, functions, or code.
Guido van Rossum421c2241997-11-18 15:47:55 +000032
Tim Peters88869f92001-01-14 23:36:06 +000033 With no argument, disassemble the last traceback.
Guido van Rossum421c2241997-11-18 15:47:55 +000034
Tim Peters88869f92001-01-14 23:36:06 +000035 """
Raymond Hettinger0f4940c2002-06-01 00:57:55 +000036 if x is None:
Tim Peters88869f92001-01-14 23:36:06 +000037 distb()
38 return
Nick Coghlaneae2da12010-08-17 08:03:36 +000039 if hasattr(x, '__func__'): # Method
Christian Heimesff737952007-11-27 10:40:20 +000040 x = x.__func__
Nick Coghlaneae2da12010-08-17 08:03:36 +000041 if hasattr(x, '__code__'): # Function
Neal Norwitz221085d2007-02-25 20:55:47 +000042 x = x.__code__
Nick Coghlaneae2da12010-08-17 08:03:36 +000043 if hasattr(x, '__dict__'): # Class or module
Guido van Rossume7ba4952007-06-06 23:52:48 +000044 items = sorted(x.__dict__.items())
Tim Peters88869f92001-01-14 23:36:06 +000045 for name, x1 in items:
Benjamin Peterson6ef9a842010-04-04 23:26:50 +000046 if isinstance(x1, _have_code):
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100047 print("Disassembly of %s:" % name, file=file)
Tim Peters88869f92001-01-14 23:36:06 +000048 try:
49 dis(x1)
Guido van Rossumb940e112007-01-10 16:19:56 +000050 except TypeError as msg:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100051 print("Sorry:", msg, file=file)
52 print(file=file)
Nick Coghlaneae2da12010-08-17 08:03:36 +000053 elif hasattr(x, 'co_code'): # Code object
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100054 disassemble(x, file=file)
Nick Coghlaneae2da12010-08-17 08:03:36 +000055 elif isinstance(x, (bytes, bytearray)): # Raw bytecode
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100056 _disassemble_bytes(x, file=file)
Nick Coghlaneae2da12010-08-17 08:03:36 +000057 elif isinstance(x, str): # Source code
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100058 _disassemble_str(x, file=file)
Tim Peters88869f92001-01-14 23:36:06 +000059 else:
Guido van Rossume7ba4952007-06-06 23:52:48 +000060 raise TypeError("don't know how to disassemble %s objects" %
61 type(x).__name__)
Guido van Rossum217a5fa1990-12-26 15:40:07 +000062
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100063def distb(tb=None, *, file=None):
Tim Peters88869f92001-01-14 23:36:06 +000064 """Disassemble a traceback (default: last traceback)."""
Raymond Hettinger0f4940c2002-06-01 00:57:55 +000065 if tb is None:
Tim Peters88869f92001-01-14 23:36:06 +000066 try:
67 tb = sys.last_traceback
68 except AttributeError:
Collin Winterce36ad82007-08-30 01:19:48 +000069 raise RuntimeError("no last traceback to disassemble")
Tim Peters88869f92001-01-14 23:36:06 +000070 while tb.tb_next: tb = tb.tb_next
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100071 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
Guido van Rossum217a5fa1990-12-26 15:40:07 +000072
Nick Coghlan09c81232010-08-17 10:18:16 +000073# The inspect module interrogates this dictionary to build its
74# list of CO_* constants. It is also used by pretty_flags to
75# turn the co_flags field into a human readable list.
76COMPILER_FLAG_NAMES = {
Guido van Rossum3e1b85e2007-05-30 02:07:00 +000077 1: "OPTIMIZED",
78 2: "NEWLOCALS",
79 4: "VARARGS",
80 8: "VARKEYWORDS",
81 16: "NESTED",
82 32: "GENERATOR",
83 64: "NOFREE",
84}
85
86def pretty_flags(flags):
87 """Return pretty representation of code flags."""
88 names = []
89 for i in range(32):
90 flag = 1<<i
91 if flags & flag:
Nick Coghlan09c81232010-08-17 10:18:16 +000092 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
Guido van Rossum3e1b85e2007-05-30 02:07:00 +000093 flags ^= flag
94 if not flags:
95 break
96 else:
97 names.append(hex(flags))
98 return ", ".join(names)
99
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000100def _get_code_object(x):
101 """Helper to handle methods, functions, strings and raw code objects"""
Nick Coghlaneae2da12010-08-17 08:03:36 +0000102 if hasattr(x, '__func__'): # Method
103 x = x.__func__
104 if hasattr(x, '__code__'): # Function
105 x = x.__code__
106 if isinstance(x, str): # Source code
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000107 x = _try_compile(x, "<disassembly>")
Nick Coghlaneae2da12010-08-17 08:03:36 +0000108 if hasattr(x, 'co_code'): # Code object
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000109 return x
110 raise TypeError("don't know how to disassemble %s objects" %
111 type(x).__name__)
112
113def code_info(x):
114 """Formatted details of methods, functions, or code."""
115 return _format_code_info(_get_code_object(x))
Nick Coghlaneae2da12010-08-17 08:03:36 +0000116
117def _format_code_info(co):
118 lines = []
119 lines.append("Name: %s" % co.co_name)
120 lines.append("Filename: %s" % co.co_filename)
121 lines.append("Argument count: %s" % co.co_argcount)
122 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
123 lines.append("Number of locals: %s" % co.co_nlocals)
124 lines.append("Stack size: %s" % co.co_stacksize)
125 lines.append("Flags: %s" % pretty_flags(co.co_flags))
126 if co.co_consts:
127 lines.append("Constants:")
128 for i_c in enumerate(co.co_consts):
129 lines.append("%4d: %r" % i_c)
130 if co.co_names:
131 lines.append("Names:")
132 for i_n in enumerate(co.co_names):
133 lines.append("%4d: %s" % i_n)
134 if co.co_varnames:
135 lines.append("Variable names:")
136 for i_n in enumerate(co.co_varnames):
137 lines.append("%4d: %s" % i_n)
138 if co.co_freevars:
139 lines.append("Free variables:")
140 for i_n in enumerate(co.co_freevars):
141 lines.append("%4d: %s" % i_n)
142 if co.co_cellvars:
143 lines.append("Cell variables:")
144 for i_n in enumerate(co.co_cellvars):
145 lines.append("%4d: %s" % i_n)
146 return "\n".join(lines)
147
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000148def show_code(co, *, file=None):
Nick Coghlane8814fb2010-09-10 14:08:04 +0000149 """Print details of methods, functions, or code to stdout."""
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000150 print(code_info(co), file=file)
Guido van Rossum3e1b85e2007-05-30 02:07:00 +0000151
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000152_Instruction = collections.namedtuple("_Instruction",
153 "opname opcode arg argval argrepr offset starts_line is_jump_target")
154
155class Instruction(_Instruction):
156 """Details for a bytecode operation
157
158 Defined fields:
159 opname - human readable name for operation
160 opcode - numeric code for operation
161 arg - numeric argument to operation (if any), otherwise None
162 argval - resolved arg value (if known), otherwise same as arg
163 argrepr - human readable description of operation argument
164 offset - start index of operation within bytecode sequence
165 starts_line - line started by this opcode (if any), otherwise None
166 is_jump_target - True if other code jumps to here, otherwise False
167 """
168
169 def _disassemble(self, lineno_width=3, mark_as_current=False):
170 """Format instruction details for inclusion in disassembly output
171
172 *lineno_width* sets the width of the line number field (0 omits it)
173 *mark_as_current* inserts a '-->' marker arrow as part of the line
174 """
175 fields = []
176 # Column: Source code line number
177 if lineno_width:
178 if self.starts_line is not None:
179 lineno_fmt = "%%%dd" % lineno_width
180 fields.append(lineno_fmt % self.starts_line)
181 else:
182 fields.append(' ' * lineno_width)
183 # Column: Current instruction indicator
184 if mark_as_current:
185 fields.append('-->')
186 else:
187 fields.append(' ')
188 # Column: Jump target marker
189 if self.is_jump_target:
190 fields.append('>>')
191 else:
192 fields.append(' ')
193 # Column: Instruction offset from start of code sequence
194 fields.append(repr(self.offset).rjust(4))
195 # Column: Opcode name
196 fields.append(self.opname.ljust(20))
197 # Column: Opcode argument
198 if self.arg is not None:
199 fields.append(repr(self.arg).rjust(5))
200 # Column: Opcode argument details
201 if self.argrepr:
202 fields.append('(' + self.argrepr + ')')
203 return ' '.join(fields)
204
205
206def get_instructions(x, *, line_offset=0):
207 """Iterator for the opcodes in methods, functions or code
208
209 Generates a series of Instruction named tuples giving the details of
210 each operations in the supplied code.
211
212 The given line offset is added to the 'starts_line' attribute of any
213 instructions that start a new line.
214 """
215 co = _get_code_object(x)
216 cell_names = co.co_cellvars + co.co_freevars
Armin Rigo9c8f7ea2003-10-28 12:17:25 +0000217 linestarts = dict(findlinestarts(co))
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000218 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
219 co.co_consts, cell_names, linestarts,
220 line_offset)
221
222def _get_const_info(const_index, const_list):
223 """Helper to get optional details about const references
224
225 Returns the dereferenced constant and its repr if the constant
226 list is defined.
227 Otherwise returns the constant index and its repr().
228 """
229 argval = const_index
230 if const_list is not None:
231 argval = const_list[const_index]
232 return argval, repr(argval)
233
234def _get_name_info(name_index, name_list):
235 """Helper to get optional details about named references
236
237 Returns the dereferenced name as both value and repr if the name
238 list is defined.
239 Otherwise returns the name index and its repr().
240 """
241 argval = name_index
242 if name_list is not None:
243 argval = name_list[name_index]
244 argrepr = argval
245 else:
246 argrepr = repr(argval)
247 return argval, argrepr
248
249
250def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
251 cells=None, linestarts=None, line_offset=0):
252 """Iterate over the instructions in a bytecode string.
253
254 Generates a sequence of Instruction namedtuples giving the details of each
255 opcode. Additional information about the code's runtime environment
256 (e.g. variable names, constants) can be specified using optional
257 arguments.
258
259 """
260 labels = findlabels(code)
261 extended_arg = 0
262 starts_line = None
263 free = None
264 # enumerate() is not an option, since we sometimes process
265 # multiple elements on a single pass through the loop
Tim Peters88869f92001-01-14 23:36:06 +0000266 n = len(code)
267 i = 0
Tim Peters88869f92001-01-14 23:36:06 +0000268 while i < n:
Guido van Rossum75a902d2007-10-19 22:06:24 +0000269 op = code[i]
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000270 offset = i
271 if linestarts is not None:
272 starts_line = linestarts.get(i, None)
273 if starts_line is not None:
274 starts_line += line_offset
275 is_jump_target = i in labels
Tim Peters88869f92001-01-14 23:36:06 +0000276 i = i+1
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000277 arg = None
278 argval = None
279 argrepr = ''
Tim Peters88869f92001-01-14 23:36:06 +0000280 if op >= HAVE_ARGUMENT:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000281 arg = code[i] + code[i+1]*256 + extended_arg
Tim Peters88869f92001-01-14 23:36:06 +0000282 extended_arg = 0
283 i = i+2
284 if op == EXTENDED_ARG:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000285 extended_arg = arg*65536
286 # Set argval to the dereferenced value of the argument when
287 # availabe, and argrepr to the string representation of argval.
288 # _disassemble_bytes needs the string repr of the
289 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
290 argval = arg
Tim Peters88869f92001-01-14 23:36:06 +0000291 if op in hasconst:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000292 argval, argrepr = _get_const_info(arg, constants)
Tim Peters88869f92001-01-14 23:36:06 +0000293 elif op in hasname:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000294 argval, argrepr = _get_name_info(arg, names)
Tim Peters88869f92001-01-14 23:36:06 +0000295 elif op in hasjrel:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000296 argval = i + arg
297 argrepr = "to " + repr(argval)
Tim Peters88869f92001-01-14 23:36:06 +0000298 elif op in haslocal:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000299 argval, argrepr = _get_name_info(arg, varnames)
Tim Peters88869f92001-01-14 23:36:06 +0000300 elif op in hascompare:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000301 argval = cmp_op[arg]
302 argrepr = argval
Jeremy Hyltona39414b2001-01-25 20:08:47 +0000303 elif op in hasfree:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000304 argval, argrepr = _get_name_info(arg, cells)
Alexander Belopolsky74482202012-06-07 14:28:14 -0400305 elif op in hasnargs:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000306 argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
307 yield Instruction(opname[op], op,
308 arg, argval, argrepr,
309 offset, starts_line, is_jump_target)
310
311def disassemble(co, lasti=-1, *, file=None):
312 """Disassemble a code object."""
313 cell_names = co.co_cellvars + co.co_freevars
314 linestarts = dict(findlinestarts(co))
315 _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
316 co.co_consts, cell_names, linestarts, file=file)
Guido van Rossum217a5fa1990-12-26 15:40:07 +0000317
Nick Coghlan5c8b54e2010-07-03 07:36:51 +0000318def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000319 constants=None, cells=None, linestarts=None,
320 *, file=None):
321 # Omit the line number column entirely if we have no line number info
322 show_lineno = linestarts is not None
323 # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
324 lineno_width = 3 if show_lineno else 0
325 for instr in _get_instructions_bytes(code, varnames, names,
326 constants, cells, linestarts):
327 new_source_line = (show_lineno and
328 instr.starts_line is not None and
329 instr.offset > 0)
330 if new_source_line:
331 print(file=file)
332 is_current_instr = instr.offset == lasti
333 print(instr._disassemble(lineno_width, is_current_instr), file=file)
Skip Montanaro19c6ba32003-02-27 21:29:27 +0000334
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000335def _disassemble_str(source, *, file=None):
Nick Coghlan5c8b54e2010-07-03 07:36:51 +0000336 """Compile the source string, then disassemble the code object."""
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000337 disassemble(_try_compile(source, '<dis>'), file=file)
Nick Coghlan5c8b54e2010-07-03 07:36:51 +0000338
Tim Peters88869f92001-01-14 23:36:06 +0000339disco = disassemble # XXX For backwards compatibility
Guido van Rossumbd307951997-01-17 20:05:04 +0000340
Guido van Rossum217a5fa1990-12-26 15:40:07 +0000341def findlabels(code):
Tim Peters88869f92001-01-14 23:36:06 +0000342 """Detect all offsets in a byte code which are jump targets.
Guido van Rossum421c2241997-11-18 15:47:55 +0000343
Tim Peters88869f92001-01-14 23:36:06 +0000344 Return the list of offsets.
Guido van Rossum421c2241997-11-18 15:47:55 +0000345
Tim Peters88869f92001-01-14 23:36:06 +0000346 """
347 labels = []
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000348 # enumerate() is not an option, since we sometimes process
349 # multiple elements on a single pass through the loop
Tim Peters88869f92001-01-14 23:36:06 +0000350 n = len(code)
351 i = 0
352 while i < n:
Guido van Rossum75a902d2007-10-19 22:06:24 +0000353 op = code[i]
Tim Peters88869f92001-01-14 23:36:06 +0000354 i = i+1
355 if op >= HAVE_ARGUMENT:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000356 arg = code[i] + code[i+1]*256
Tim Peters88869f92001-01-14 23:36:06 +0000357 i = i+2
358 label = -1
359 if op in hasjrel:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000360 label = i+arg
Tim Peters88869f92001-01-14 23:36:06 +0000361 elif op in hasjabs:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000362 label = arg
Tim Peters88869f92001-01-14 23:36:06 +0000363 if label >= 0:
364 if label not in labels:
365 labels.append(label)
366 return labels
Guido van Rossum217a5fa1990-12-26 15:40:07 +0000367
Armin Rigo9c8f7ea2003-10-28 12:17:25 +0000368def findlinestarts(code):
369 """Find the offsets in a byte code which are start of lines in the source.
370
371 Generate pairs (offset, lineno) as described in Python/compile.c.
372
373 """
Guido van Rossum75a902d2007-10-19 22:06:24 +0000374 byte_increments = list(code.co_lnotab[0::2])
375 line_increments = list(code.co_lnotab[1::2])
Armin Rigo9c8f7ea2003-10-28 12:17:25 +0000376
377 lastlineno = None
378 lineno = code.co_firstlineno
379 addr = 0
380 for byte_incr, line_incr in zip(byte_increments, line_increments):
381 if byte_incr:
382 if lineno != lastlineno:
383 yield (addr, lineno)
384 lastlineno = lineno
385 addr += byte_incr
386 lineno += line_incr
387 if lineno != lastlineno:
388 yield (addr, lineno)
Guido van Rossum1fdae122000-02-04 17:47:55 +0000389
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000390class Bytecode:
391 """The bytecode operations of a piece of code
392
393 Instantiate this with a function, method, string of code, or a code object
394 (as returned by compile()).
395
396 Iterating over this yields the bytecode operations as Instruction instances.
397 """
398 def __init__(self, x):
399 self.codeobj = _get_code_object(x)
400 self.cell_names = self.codeobj.co_cellvars + self.codeobj.co_freevars
401 self.linestarts = dict(findlinestarts(self.codeobj))
402 self.line_offset = 0
403 self.original_object = x
404
405 def __iter__(self):
406 co = self.codeobj
407 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
408 co.co_consts, self.cell_names,
409 self.linestarts, self.line_offset)
410
411 def __repr__(self):
412 return "{}({!r})".format(self.__class__.__name__, self.original_object)
413
414 def info(self):
415 """Return formatted information about the code object."""
416 return _format_code_info(self.codeobj)
417
418 def show_info(self, *, file=None):
419 """Print the information about the code object as returned by info()."""
420 print(self.info(), file=file)
421
422 def display_code(self, *, file=None):
423 """Print a formatted view of the bytecode operations.
424 """
425 co = self.codeobj
426 return _disassemble_bytes(co.co_code, varnames=co.co_varnames,
427 names=co.co_names, constants=co.co_consts,
428 cells=self.cell_names,
429 linestarts=self.linestarts,
430 file=file
431 )
432
433
Guido van Rossum1fdae122000-02-04 17:47:55 +0000434def _test():
Tim Peters88869f92001-01-14 23:36:06 +0000435 """Simple test program to disassemble a file."""
436 if sys.argv[1:]:
437 if sys.argv[2:]:
438 sys.stderr.write("usage: python dis.py [-|file]\n")
439 sys.exit(2)
440 fn = sys.argv[1]
441 if not fn or fn == "-":
442 fn = None
443 else:
444 fn = None
Raymond Hettinger0f4940c2002-06-01 00:57:55 +0000445 if fn is None:
Tim Peters88869f92001-01-14 23:36:06 +0000446 f = sys.stdin
447 else:
448 f = open(fn)
449 source = f.read()
Raymond Hettinger0f4940c2002-06-01 00:57:55 +0000450 if fn is not None:
Tim Peters88869f92001-01-14 23:36:06 +0000451 f.close()
452 else:
453 fn = "<stdin>"
454 code = compile(source, fn, "exec")
455 dis(code)
Guido van Rossum1fdae122000-02-04 17:47:55 +0000456
457if __name__ == "__main__":
Tim Peters88869f92001-01-14 23:36:06 +0000458 _test()