blob: 0d62c0828ccf67fd3520943590f09e4805be9ab6 [file] [log] [blame]
Guido van Rossum421c2241997-11-18 15:47:55 +00001"""Disassembler of Python byte code into mnemonics."""
Guido van Rossum217a5fa1990-12-26 15:40:07 +00002
3import sys
Guido van Rossum18aef3c1997-03-14 04:15:43 +00004import types
Nick Coghlanb39fd0c2013-05-06 23:59:20 +10005import collections
Guido van Rossum217a5fa1990-12-26 15:40:07 +00006
Skip Montanaro19c6ba32003-02-27 21:29:27 +00007from opcode import *
8from opcode import __all__ as _opcodes_all
9
Nick Coghlan7646f7e2010-09-10 12:24:24 +000010__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100011 "findlinestarts", "findlabels", "show_code",
12 "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
Skip Montanaro19c6ba32003-02-27 21:29:27 +000013del _opcodes_all
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000014
Benjamin Peterson6ef9a842010-04-04 23:26:50 +000015_have_code = (types.MethodType, types.FunctionType, types.CodeType, type)
16
Nick Coghlan5c8b54e2010-07-03 07:36:51 +000017def _try_compile(source, name):
18 """Attempts to compile the given source, first as an expression and
19 then as a statement if the first approach fails.
20
21 Utility function to accept strings in functions that otherwise
22 expect code objects
23 """
Nick Coghlan5c8b54e2010-07-03 07:36:51 +000024 try:
25 c = compile(source, name, 'eval')
26 except SyntaxError:
27 c = compile(source, name, 'exec')
28 return c
29
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100030def dis(x=None, *, file=None):
Tim Peters88869f92001-01-14 23:36:06 +000031 """Disassemble classes, methods, functions, or code.
Guido van Rossum421c2241997-11-18 15:47:55 +000032
Tim Peters88869f92001-01-14 23:36:06 +000033 With no argument, disassemble the last traceback.
Guido van Rossum421c2241997-11-18 15:47:55 +000034
Tim Peters88869f92001-01-14 23:36:06 +000035 """
Raymond Hettinger0f4940c2002-06-01 00:57:55 +000036 if x is None:
Tim Peters88869f92001-01-14 23:36:06 +000037 distb()
38 return
Nick Coghlaneae2da12010-08-17 08:03:36 +000039 if hasattr(x, '__func__'): # Method
Christian Heimesff737952007-11-27 10:40:20 +000040 x = x.__func__
Nick Coghlaneae2da12010-08-17 08:03:36 +000041 if hasattr(x, '__code__'): # Function
Neal Norwitz221085d2007-02-25 20:55:47 +000042 x = x.__code__
Nick Coghlaneae2da12010-08-17 08:03:36 +000043 if hasattr(x, '__dict__'): # Class or module
Guido van Rossume7ba4952007-06-06 23:52:48 +000044 items = sorted(x.__dict__.items())
Tim Peters88869f92001-01-14 23:36:06 +000045 for name, x1 in items:
Benjamin Peterson6ef9a842010-04-04 23:26:50 +000046 if isinstance(x1, _have_code):
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100047 print("Disassembly of %s:" % name, file=file)
Tim Peters88869f92001-01-14 23:36:06 +000048 try:
49 dis(x1)
Guido van Rossumb940e112007-01-10 16:19:56 +000050 except TypeError as msg:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100051 print("Sorry:", msg, file=file)
52 print(file=file)
Nick Coghlaneae2da12010-08-17 08:03:36 +000053 elif hasattr(x, 'co_code'): # Code object
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100054 disassemble(x, file=file)
Nick Coghlaneae2da12010-08-17 08:03:36 +000055 elif isinstance(x, (bytes, bytearray)): # Raw bytecode
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100056 _disassemble_bytes(x, file=file)
Nick Coghlaneae2da12010-08-17 08:03:36 +000057 elif isinstance(x, str): # Source code
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100058 _disassemble_str(x, file=file)
Tim Peters88869f92001-01-14 23:36:06 +000059 else:
Guido van Rossume7ba4952007-06-06 23:52:48 +000060 raise TypeError("don't know how to disassemble %s objects" %
61 type(x).__name__)
Guido van Rossum217a5fa1990-12-26 15:40:07 +000062
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100063def distb(tb=None, *, file=None):
Tim Peters88869f92001-01-14 23:36:06 +000064 """Disassemble a traceback (default: last traceback)."""
Raymond Hettinger0f4940c2002-06-01 00:57:55 +000065 if tb is None:
Tim Peters88869f92001-01-14 23:36:06 +000066 try:
67 tb = sys.last_traceback
68 except AttributeError:
Collin Winterce36ad82007-08-30 01:19:48 +000069 raise RuntimeError("no last traceback to disassemble")
Tim Peters88869f92001-01-14 23:36:06 +000070 while tb.tb_next: tb = tb.tb_next
Nick Coghlanb39fd0c2013-05-06 23:59:20 +100071 disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
Guido van Rossum217a5fa1990-12-26 15:40:07 +000072
Nick Coghlan09c81232010-08-17 10:18:16 +000073# The inspect module interrogates this dictionary to build its
74# list of CO_* constants. It is also used by pretty_flags to
75# turn the co_flags field into a human readable list.
76COMPILER_FLAG_NAMES = {
Guido van Rossum3e1b85e2007-05-30 02:07:00 +000077 1: "OPTIMIZED",
78 2: "NEWLOCALS",
79 4: "VARARGS",
80 8: "VARKEYWORDS",
81 16: "NESTED",
82 32: "GENERATOR",
83 64: "NOFREE",
84}
85
86def pretty_flags(flags):
87 """Return pretty representation of code flags."""
88 names = []
89 for i in range(32):
90 flag = 1<<i
91 if flags & flag:
Nick Coghlan09c81232010-08-17 10:18:16 +000092 names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
Guido van Rossum3e1b85e2007-05-30 02:07:00 +000093 flags ^= flag
94 if not flags:
95 break
96 else:
97 names.append(hex(flags))
98 return ", ".join(names)
99
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000100def _get_code_object(x):
101 """Helper to handle methods, functions, strings and raw code objects"""
Nick Coghlaneae2da12010-08-17 08:03:36 +0000102 if hasattr(x, '__func__'): # Method
103 x = x.__func__
104 if hasattr(x, '__code__'): # Function
105 x = x.__code__
106 if isinstance(x, str): # Source code
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000107 x = _try_compile(x, "<disassembly>")
Nick Coghlaneae2da12010-08-17 08:03:36 +0000108 if hasattr(x, 'co_code'): # Code object
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000109 return x
110 raise TypeError("don't know how to disassemble %s objects" %
111 type(x).__name__)
112
113def code_info(x):
114 """Formatted details of methods, functions, or code."""
115 return _format_code_info(_get_code_object(x))
Nick Coghlaneae2da12010-08-17 08:03:36 +0000116
117def _format_code_info(co):
118 lines = []
119 lines.append("Name: %s" % co.co_name)
120 lines.append("Filename: %s" % co.co_filename)
121 lines.append("Argument count: %s" % co.co_argcount)
122 lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
123 lines.append("Number of locals: %s" % co.co_nlocals)
124 lines.append("Stack size: %s" % co.co_stacksize)
125 lines.append("Flags: %s" % pretty_flags(co.co_flags))
126 if co.co_consts:
127 lines.append("Constants:")
128 for i_c in enumerate(co.co_consts):
129 lines.append("%4d: %r" % i_c)
130 if co.co_names:
131 lines.append("Names:")
132 for i_n in enumerate(co.co_names):
133 lines.append("%4d: %s" % i_n)
134 if co.co_varnames:
135 lines.append("Variable names:")
136 for i_n in enumerate(co.co_varnames):
137 lines.append("%4d: %s" % i_n)
138 if co.co_freevars:
139 lines.append("Free variables:")
140 for i_n in enumerate(co.co_freevars):
141 lines.append("%4d: %s" % i_n)
142 if co.co_cellvars:
143 lines.append("Cell variables:")
144 for i_n in enumerate(co.co_cellvars):
145 lines.append("%4d: %s" % i_n)
146 return "\n".join(lines)
147
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000148def show_code(co, *, file=None):
Ezio Melotti6e6c6ac2013-08-23 22:41:39 +0300149 """Print details of methods, functions, or code to *file*.
150
151 If *file* is not provided, the output is printed on stdout.
152 """
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000153 print(code_info(co), file=file)
Guido van Rossum3e1b85e2007-05-30 02:07:00 +0000154
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000155_Instruction = collections.namedtuple("_Instruction",
156 "opname opcode arg argval argrepr offset starts_line is_jump_target")
157
158class Instruction(_Instruction):
159 """Details for a bytecode operation
160
161 Defined fields:
162 opname - human readable name for operation
163 opcode - numeric code for operation
164 arg - numeric argument to operation (if any), otherwise None
165 argval - resolved arg value (if known), otherwise same as arg
166 argrepr - human readable description of operation argument
167 offset - start index of operation within bytecode sequence
168 starts_line - line started by this opcode (if any), otherwise None
169 is_jump_target - True if other code jumps to here, otherwise False
170 """
171
172 def _disassemble(self, lineno_width=3, mark_as_current=False):
173 """Format instruction details for inclusion in disassembly output
174
175 *lineno_width* sets the width of the line number field (0 omits it)
176 *mark_as_current* inserts a '-->' marker arrow as part of the line
177 """
178 fields = []
179 # Column: Source code line number
180 if lineno_width:
181 if self.starts_line is not None:
182 lineno_fmt = "%%%dd" % lineno_width
183 fields.append(lineno_fmt % self.starts_line)
184 else:
185 fields.append(' ' * lineno_width)
186 # Column: Current instruction indicator
187 if mark_as_current:
188 fields.append('-->')
189 else:
190 fields.append(' ')
191 # Column: Jump target marker
192 if self.is_jump_target:
193 fields.append('>>')
194 else:
195 fields.append(' ')
196 # Column: Instruction offset from start of code sequence
197 fields.append(repr(self.offset).rjust(4))
198 # Column: Opcode name
199 fields.append(self.opname.ljust(20))
200 # Column: Opcode argument
201 if self.arg is not None:
202 fields.append(repr(self.arg).rjust(5))
203 # Column: Opcode argument details
204 if self.argrepr:
205 fields.append('(' + self.argrepr + ')')
206 return ' '.join(fields)
207
208
209def get_instructions(x, *, line_offset=0):
210 """Iterator for the opcodes in methods, functions or code
211
212 Generates a series of Instruction named tuples giving the details of
213 each operations in the supplied code.
214
215 The given line offset is added to the 'starts_line' attribute of any
216 instructions that start a new line.
217 """
218 co = _get_code_object(x)
219 cell_names = co.co_cellvars + co.co_freevars
Armin Rigo9c8f7ea2003-10-28 12:17:25 +0000220 linestarts = dict(findlinestarts(co))
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000221 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
222 co.co_consts, cell_names, linestarts,
223 line_offset)
224
225def _get_const_info(const_index, const_list):
226 """Helper to get optional details about const references
227
228 Returns the dereferenced constant and its repr if the constant
229 list is defined.
230 Otherwise returns the constant index and its repr().
231 """
232 argval = const_index
233 if const_list is not None:
234 argval = const_list[const_index]
235 return argval, repr(argval)
236
237def _get_name_info(name_index, name_list):
238 """Helper to get optional details about named references
239
240 Returns the dereferenced name as both value and repr if the name
241 list is defined.
242 Otherwise returns the name index and its repr().
243 """
244 argval = name_index
245 if name_list is not None:
246 argval = name_list[name_index]
247 argrepr = argval
248 else:
249 argrepr = repr(argval)
250 return argval, argrepr
251
252
253def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
254 cells=None, linestarts=None, line_offset=0):
255 """Iterate over the instructions in a bytecode string.
256
257 Generates a sequence of Instruction namedtuples giving the details of each
258 opcode. Additional information about the code's runtime environment
259 (e.g. variable names, constants) can be specified using optional
260 arguments.
261
262 """
263 labels = findlabels(code)
264 extended_arg = 0
265 starts_line = None
266 free = None
267 # enumerate() is not an option, since we sometimes process
268 # multiple elements on a single pass through the loop
Tim Peters88869f92001-01-14 23:36:06 +0000269 n = len(code)
270 i = 0
Tim Peters88869f92001-01-14 23:36:06 +0000271 while i < n:
Guido van Rossum75a902d2007-10-19 22:06:24 +0000272 op = code[i]
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000273 offset = i
274 if linestarts is not None:
275 starts_line = linestarts.get(i, None)
276 if starts_line is not None:
277 starts_line += line_offset
278 is_jump_target = i in labels
Tim Peters88869f92001-01-14 23:36:06 +0000279 i = i+1
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000280 arg = None
281 argval = None
282 argrepr = ''
Tim Peters88869f92001-01-14 23:36:06 +0000283 if op >= HAVE_ARGUMENT:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000284 arg = code[i] + code[i+1]*256 + extended_arg
Tim Peters88869f92001-01-14 23:36:06 +0000285 extended_arg = 0
286 i = i+2
287 if op == EXTENDED_ARG:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000288 extended_arg = arg*65536
289 # Set argval to the dereferenced value of the argument when
290 # availabe, and argrepr to the string representation of argval.
291 # _disassemble_bytes needs the string repr of the
292 # raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
293 argval = arg
Tim Peters88869f92001-01-14 23:36:06 +0000294 if op in hasconst:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000295 argval, argrepr = _get_const_info(arg, constants)
Tim Peters88869f92001-01-14 23:36:06 +0000296 elif op in hasname:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000297 argval, argrepr = _get_name_info(arg, names)
Tim Peters88869f92001-01-14 23:36:06 +0000298 elif op in hasjrel:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000299 argval = i + arg
300 argrepr = "to " + repr(argval)
Tim Peters88869f92001-01-14 23:36:06 +0000301 elif op in haslocal:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000302 argval, argrepr = _get_name_info(arg, varnames)
Tim Peters88869f92001-01-14 23:36:06 +0000303 elif op in hascompare:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000304 argval = cmp_op[arg]
305 argrepr = argval
Jeremy Hyltona39414b2001-01-25 20:08:47 +0000306 elif op in hasfree:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000307 argval, argrepr = _get_name_info(arg, cells)
Alexander Belopolsky74482202012-06-07 14:28:14 -0400308 elif op in hasnargs:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000309 argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1])
310 yield Instruction(opname[op], op,
311 arg, argval, argrepr,
312 offset, starts_line, is_jump_target)
313
314def disassemble(co, lasti=-1, *, file=None):
315 """Disassemble a code object."""
316 cell_names = co.co_cellvars + co.co_freevars
317 linestarts = dict(findlinestarts(co))
318 _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
319 co.co_consts, cell_names, linestarts, file=file)
Guido van Rossum217a5fa1990-12-26 15:40:07 +0000320
Nick Coghlan5c8b54e2010-07-03 07:36:51 +0000321def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000322 constants=None, cells=None, linestarts=None,
323 *, file=None):
324 # Omit the line number column entirely if we have no line number info
325 show_lineno = linestarts is not None
326 # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
327 lineno_width = 3 if show_lineno else 0
328 for instr in _get_instructions_bytes(code, varnames, names,
329 constants, cells, linestarts):
330 new_source_line = (show_lineno and
331 instr.starts_line is not None and
332 instr.offset > 0)
333 if new_source_line:
334 print(file=file)
335 is_current_instr = instr.offset == lasti
336 print(instr._disassemble(lineno_width, is_current_instr), file=file)
Skip Montanaro19c6ba32003-02-27 21:29:27 +0000337
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000338def _disassemble_str(source, *, file=None):
Nick Coghlan5c8b54e2010-07-03 07:36:51 +0000339 """Compile the source string, then disassemble the code object."""
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000340 disassemble(_try_compile(source, '<dis>'), file=file)
Nick Coghlan5c8b54e2010-07-03 07:36:51 +0000341
Tim Peters88869f92001-01-14 23:36:06 +0000342disco = disassemble # XXX For backwards compatibility
Guido van Rossumbd307951997-01-17 20:05:04 +0000343
Guido van Rossum217a5fa1990-12-26 15:40:07 +0000344def findlabels(code):
Tim Peters88869f92001-01-14 23:36:06 +0000345 """Detect all offsets in a byte code which are jump targets.
Guido van Rossum421c2241997-11-18 15:47:55 +0000346
Tim Peters88869f92001-01-14 23:36:06 +0000347 Return the list of offsets.
Guido van Rossum421c2241997-11-18 15:47:55 +0000348
Tim Peters88869f92001-01-14 23:36:06 +0000349 """
350 labels = []
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000351 # enumerate() is not an option, since we sometimes process
352 # multiple elements on a single pass through the loop
Tim Peters88869f92001-01-14 23:36:06 +0000353 n = len(code)
354 i = 0
355 while i < n:
Guido van Rossum75a902d2007-10-19 22:06:24 +0000356 op = code[i]
Tim Peters88869f92001-01-14 23:36:06 +0000357 i = i+1
358 if op >= HAVE_ARGUMENT:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000359 arg = code[i] + code[i+1]*256
Tim Peters88869f92001-01-14 23:36:06 +0000360 i = i+2
361 label = -1
362 if op in hasjrel:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000363 label = i+arg
Tim Peters88869f92001-01-14 23:36:06 +0000364 elif op in hasjabs:
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000365 label = arg
Tim Peters88869f92001-01-14 23:36:06 +0000366 if label >= 0:
367 if label not in labels:
368 labels.append(label)
369 return labels
Guido van Rossum217a5fa1990-12-26 15:40:07 +0000370
Armin Rigo9c8f7ea2003-10-28 12:17:25 +0000371def findlinestarts(code):
372 """Find the offsets in a byte code which are start of lines in the source.
373
374 Generate pairs (offset, lineno) as described in Python/compile.c.
375
376 """
Guido van Rossum75a902d2007-10-19 22:06:24 +0000377 byte_increments = list(code.co_lnotab[0::2])
378 line_increments = list(code.co_lnotab[1::2])
Armin Rigo9c8f7ea2003-10-28 12:17:25 +0000379
380 lastlineno = None
381 lineno = code.co_firstlineno
382 addr = 0
383 for byte_incr, line_incr in zip(byte_increments, line_increments):
384 if byte_incr:
385 if lineno != lastlineno:
386 yield (addr, lineno)
387 lastlineno = lineno
388 addr += byte_incr
389 lineno += line_incr
390 if lineno != lastlineno:
391 yield (addr, lineno)
Guido van Rossum1fdae122000-02-04 17:47:55 +0000392
Nick Coghlanb39fd0c2013-05-06 23:59:20 +1000393class Bytecode:
394 """The bytecode operations of a piece of code
395
396 Instantiate this with a function, method, string of code, or a code object
397 (as returned by compile()).
398
399 Iterating over this yields the bytecode operations as Instruction instances.
400 """
401 def __init__(self, x):
402 self.codeobj = _get_code_object(x)
403 self.cell_names = self.codeobj.co_cellvars + self.codeobj.co_freevars
404 self.linestarts = dict(findlinestarts(self.codeobj))
405 self.line_offset = 0
406 self.original_object = x
407
408 def __iter__(self):
409 co = self.codeobj
410 return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
411 co.co_consts, self.cell_names,
412 self.linestarts, self.line_offset)
413
414 def __repr__(self):
415 return "{}({!r})".format(self.__class__.__name__, self.original_object)
416
417 def info(self):
418 """Return formatted information about the code object."""
419 return _format_code_info(self.codeobj)
420
421 def show_info(self, *, file=None):
422 """Print the information about the code object as returned by info()."""
423 print(self.info(), file=file)
424
425 def display_code(self, *, file=None):
426 """Print a formatted view of the bytecode operations.
427 """
428 co = self.codeobj
429 return _disassemble_bytes(co.co_code, varnames=co.co_varnames,
430 names=co.co_names, constants=co.co_consts,
431 cells=self.cell_names,
432 linestarts=self.linestarts,
433 file=file
434 )
435
436
Guido van Rossum1fdae122000-02-04 17:47:55 +0000437def _test():
Tim Peters88869f92001-01-14 23:36:06 +0000438 """Simple test program to disassemble a file."""
439 if sys.argv[1:]:
440 if sys.argv[2:]:
441 sys.stderr.write("usage: python dis.py [-|file]\n")
442 sys.exit(2)
443 fn = sys.argv[1]
444 if not fn or fn == "-":
445 fn = None
446 else:
447 fn = None
Raymond Hettinger0f4940c2002-06-01 00:57:55 +0000448 if fn is None:
Tim Peters88869f92001-01-14 23:36:06 +0000449 f = sys.stdin
450 else:
451 f = open(fn)
452 source = f.read()
Raymond Hettinger0f4940c2002-06-01 00:57:55 +0000453 if fn is not None:
Tim Peters88869f92001-01-14 23:36:06 +0000454 f.close()
455 else:
456 fn = "<stdin>"
457 code = compile(source, fn, "exec")
458 dis(code)
Guido van Rossum1fdae122000-02-04 17:47:55 +0000459
460if __name__ == "__main__":
Tim Peters88869f92001-01-14 23:36:06 +0000461 _test()