Guido van Rossum | 421c224 | 1997-11-18 15:47:55 +0000 | [diff] [blame] | 1 | """Disassembler of Python byte code into mnemonics.""" |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 2 | |
| 3 | import sys |
Guido van Rossum | 18aef3c | 1997-03-14 04:15:43 +0000 | [diff] [blame] | 4 | import types |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 5 | import collections |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 6 | import io |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 7 | |
Skip Montanaro | 19c6ba3 | 2003-02-27 21:29:27 +0000 | [diff] [blame] | 8 | from opcode import * |
| 9 | from opcode import __all__ as _opcodes_all |
| 10 | |
Nick Coghlan | 7646f7e | 2010-09-10 12:24:24 +0000 | [diff] [blame] | 11 | __all__ = ["code_info", "dis", "disassemble", "distb", "disco", |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 12 | "findlinestarts", "findlabels", "show_code", |
| 13 | "get_instructions", "Instruction", "Bytecode"] + _opcodes_all |
Skip Montanaro | 19c6ba3 | 2003-02-27 21:29:27 +0000 | [diff] [blame] | 14 | del _opcodes_all |
Skip Montanaro | e99d5ea | 2001-01-20 19:54:20 +0000 | [diff] [blame] | 15 | |
Serhiy Storchaka | 585c93d | 2016-04-23 09:23:52 +0300 | [diff] [blame] | 16 | _have_code = (types.MethodType, types.FunctionType, types.CodeType, |
| 17 | classmethod, staticmethod, type) |
Benjamin Peterson | 6ef9a84 | 2010-04-04 23:26:50 +0000 | [diff] [blame] | 18 | |
Serhiy Storchaka | dd102f7 | 2016-10-08 12:34:25 +0300 | [diff] [blame] | 19 | FORMAT_VALUE = opmap['FORMAT_VALUE'] |
Serhiy Storchaka | e2732d3 | 2018-03-11 11:07:06 +0200 | [diff] [blame] | 20 | FORMAT_VALUE_CONVERTERS = ( |
| 21 | (None, ''), |
| 22 | (str, 'str'), |
| 23 | (repr, 'repr'), |
| 24 | (ascii, 'ascii'), |
| 25 | ) |
| 26 | MAKE_FUNCTION = opmap['MAKE_FUNCTION'] |
| 27 | MAKE_FUNCTION_FLAGS = ('defaults', 'kwdefaults', 'annotations', 'closure') |
| 28 | |
Serhiy Storchaka | dd102f7 | 2016-10-08 12:34:25 +0300 | [diff] [blame] | 29 | |
Nick Coghlan | 5c8b54e | 2010-07-03 07:36:51 +0000 | [diff] [blame] | 30 | def _try_compile(source, name): |
| 31 | """Attempts to compile the given source, first as an expression and |
| 32 | then as a statement if the first approach fails. |
| 33 | |
| 34 | Utility function to accept strings in functions that otherwise |
| 35 | expect code objects |
| 36 | """ |
Nick Coghlan | 5c8b54e | 2010-07-03 07:36:51 +0000 | [diff] [blame] | 37 | try: |
| 38 | c = compile(source, name, 'eval') |
| 39 | except SyntaxError: |
| 40 | c = compile(source, name, 'exec') |
| 41 | return c |
| 42 | |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 43 | def dis(x=None, *, file=None, depth=None): |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 44 | """Disassemble classes, methods, functions, and other compiled objects. |
Guido van Rossum | 421c224 | 1997-11-18 15:47:55 +0000 | [diff] [blame] | 45 | |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 46 | With no argument, disassemble the last traceback. |
Guido van Rossum | 421c224 | 1997-11-18 15:47:55 +0000 | [diff] [blame] | 47 | |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 48 | Compiled objects currently include generator objects, async generator |
| 49 | objects, and coroutine objects, all of which store their code object |
| 50 | in a special attribute. |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 51 | """ |
Raymond Hettinger | 0f4940c | 2002-06-01 00:57:55 +0000 | [diff] [blame] | 52 | if x is None: |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 53 | distb(file=file) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 54 | return |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 55 | # Extract functions from methods. |
| 56 | if hasattr(x, '__func__'): |
Christian Heimes | ff73795 | 2007-11-27 10:40:20 +0000 | [diff] [blame] | 57 | x = x.__func__ |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 58 | # Extract compiled code objects from... |
| 59 | if hasattr(x, '__code__'): # ...a function, or |
Neal Norwitz | 221085d | 2007-02-25 20:55:47 +0000 | [diff] [blame] | 60 | x = x.__code__ |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 61 | elif hasattr(x, 'gi_code'): #...a generator object, or |
Nick Coghlan | efd5df9 | 2014-07-25 23:02:56 +1000 | [diff] [blame] | 62 | x = x.gi_code |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 63 | elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or |
| 64 | x = x.ag_code |
| 65 | elif hasattr(x, 'cr_code'): #...a coroutine. |
| 66 | x = x.cr_code |
| 67 | # Perform the disassembly. |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 68 | if hasattr(x, '__dict__'): # Class or module |
Guido van Rossum | e7ba495 | 2007-06-06 23:52:48 +0000 | [diff] [blame] | 69 | items = sorted(x.__dict__.items()) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 70 | for name, x1 in items: |
Benjamin Peterson | 6ef9a84 | 2010-04-04 23:26:50 +0000 | [diff] [blame] | 71 | if isinstance(x1, _have_code): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 72 | print("Disassembly of %s:" % name, file=file) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 73 | try: |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 74 | dis(x1, file=file, depth=depth) |
Guido van Rossum | b940e11 | 2007-01-10 16:19:56 +0000 | [diff] [blame] | 75 | except TypeError as msg: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 76 | print("Sorry:", msg, file=file) |
| 77 | print(file=file) |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 78 | elif hasattr(x, 'co_code'): # Code object |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 79 | _disassemble_recursive(x, file=file, depth=depth) |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 80 | elif isinstance(x, (bytes, bytearray)): # Raw bytecode |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 81 | _disassemble_bytes(x, file=file) |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 82 | elif isinstance(x, str): # Source code |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 83 | _disassemble_str(x, file=file, depth=depth) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 84 | else: |
Guido van Rossum | e7ba495 | 2007-06-06 23:52:48 +0000 | [diff] [blame] | 85 | raise TypeError("don't know how to disassemble %s objects" % |
| 86 | type(x).__name__) |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 87 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 88 | def distb(tb=None, *, file=None): |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 89 | """Disassemble a traceback (default: last traceback).""" |
Raymond Hettinger | 0f4940c | 2002-06-01 00:57:55 +0000 | [diff] [blame] | 90 | if tb is None: |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 91 | try: |
| 92 | tb = sys.last_traceback |
| 93 | except AttributeError: |
Serhiy Storchaka | 5affd23 | 2017-04-05 09:37:24 +0300 | [diff] [blame] | 94 | raise RuntimeError("no last traceback to disassemble") from None |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 95 | while tb.tb_next: tb = tb.tb_next |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 96 | disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file) |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 97 | |
Nick Coghlan | 09c8123 | 2010-08-17 10:18:16 +0000 | [diff] [blame] | 98 | # The inspect module interrogates this dictionary to build its |
| 99 | # list of CO_* constants. It is also used by pretty_flags to |
| 100 | # turn the co_flags field into a human readable list. |
| 101 | COMPILER_FLAG_NAMES = { |
Guido van Rossum | 3e1b85e | 2007-05-30 02:07:00 +0000 | [diff] [blame] | 102 | 1: "OPTIMIZED", |
| 103 | 2: "NEWLOCALS", |
| 104 | 4: "VARARGS", |
| 105 | 8: "VARKEYWORDS", |
| 106 | 16: "NESTED", |
| 107 | 32: "GENERATOR", |
| 108 | 64: "NOFREE", |
Yury Selivanov | 7544508 | 2015-05-11 22:57:16 -0400 | [diff] [blame] | 109 | 128: "COROUTINE", |
| 110 | 256: "ITERABLE_COROUTINE", |
Yury Selivanov | eb63645 | 2016-09-08 22:01:51 -0700 | [diff] [blame] | 111 | 512: "ASYNC_GENERATOR", |
Guido van Rossum | 3e1b85e | 2007-05-30 02:07:00 +0000 | [diff] [blame] | 112 | } |
| 113 | |
| 114 | def pretty_flags(flags): |
| 115 | """Return pretty representation of code flags.""" |
| 116 | names = [] |
| 117 | for i in range(32): |
| 118 | flag = 1<<i |
| 119 | if flags & flag: |
Nick Coghlan | 09c8123 | 2010-08-17 10:18:16 +0000 | [diff] [blame] | 120 | names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag))) |
Guido van Rossum | 3e1b85e | 2007-05-30 02:07:00 +0000 | [diff] [blame] | 121 | flags ^= flag |
| 122 | if not flags: |
| 123 | break |
| 124 | else: |
| 125 | names.append(hex(flags)) |
| 126 | return ", ".join(names) |
| 127 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 128 | def _get_code_object(x): |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 129 | """Helper to handle methods, compiled or raw code objects, and strings.""" |
| 130 | # Extract functions from methods. |
| 131 | if hasattr(x, '__func__'): |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 132 | x = x.__func__ |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 133 | # Extract compiled code objects from... |
| 134 | if hasattr(x, '__code__'): # ...a function, or |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 135 | x = x.__code__ |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 136 | elif hasattr(x, 'gi_code'): #...a generator object, or |
Nick Coghlan | efd5df9 | 2014-07-25 23:02:56 +1000 | [diff] [blame] | 137 | x = x.gi_code |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 138 | elif hasattr(x, 'ag_code'): #...an asynchronous generator object, or |
| 139 | x = x.ag_code |
| 140 | elif hasattr(x, 'cr_code'): #...a coroutine. |
| 141 | x = x.cr_code |
| 142 | # Handle source code. |
| 143 | if isinstance(x, str): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 144 | x = _try_compile(x, "<disassembly>") |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 145 | # By now, if we don't have a code object, we can't disassemble x. |
| 146 | if hasattr(x, 'co_code'): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 147 | return x |
| 148 | raise TypeError("don't know how to disassemble %s objects" % |
| 149 | type(x).__name__) |
| 150 | |
| 151 | def code_info(x): |
| 152 | """Formatted details of methods, functions, or code.""" |
| 153 | return _format_code_info(_get_code_object(x)) |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 154 | |
| 155 | def _format_code_info(co): |
| 156 | lines = [] |
| 157 | lines.append("Name: %s" % co.co_name) |
| 158 | lines.append("Filename: %s" % co.co_filename) |
| 159 | lines.append("Argument count: %s" % co.co_argcount) |
Pablo Galindo | 8c77b8c | 2019-04-29 13:36:57 +0100 | [diff] [blame] | 160 | lines.append("Positional-only arguments: %s" % co.co_posonlyargcount) |
Nick Coghlan | eae2da1 | 2010-08-17 08:03:36 +0000 | [diff] [blame] | 161 | lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount) |
| 162 | lines.append("Number of locals: %s" % co.co_nlocals) |
| 163 | lines.append("Stack size: %s" % co.co_stacksize) |
| 164 | lines.append("Flags: %s" % pretty_flags(co.co_flags)) |
| 165 | if co.co_consts: |
| 166 | lines.append("Constants:") |
| 167 | for i_c in enumerate(co.co_consts): |
| 168 | lines.append("%4d: %r" % i_c) |
| 169 | if co.co_names: |
| 170 | lines.append("Names:") |
| 171 | for i_n in enumerate(co.co_names): |
| 172 | lines.append("%4d: %s" % i_n) |
| 173 | if co.co_varnames: |
| 174 | lines.append("Variable names:") |
| 175 | for i_n in enumerate(co.co_varnames): |
| 176 | lines.append("%4d: %s" % i_n) |
| 177 | if co.co_freevars: |
| 178 | lines.append("Free variables:") |
| 179 | for i_n in enumerate(co.co_freevars): |
| 180 | lines.append("%4d: %s" % i_n) |
| 181 | if co.co_cellvars: |
| 182 | lines.append("Cell variables:") |
| 183 | for i_n in enumerate(co.co_cellvars): |
| 184 | lines.append("%4d: %s" % i_n) |
| 185 | return "\n".join(lines) |
| 186 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 187 | def show_code(co, *, file=None): |
Ezio Melotti | 6e6c6ac | 2013-08-23 22:41:39 +0300 | [diff] [blame] | 188 | """Print details of methods, functions, or code to *file*. |
| 189 | |
| 190 | If *file* is not provided, the output is printed on stdout. |
| 191 | """ |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 192 | print(code_info(co), file=file) |
Guido van Rossum | 3e1b85e | 2007-05-30 02:07:00 +0000 | [diff] [blame] | 193 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 194 | _Instruction = collections.namedtuple("_Instruction", |
| 195 | "opname opcode arg argval argrepr offset starts_line is_jump_target") |
| 196 | |
Raymond Hettinger | 5b798ab | 2015-08-17 22:04:45 -0700 | [diff] [blame] | 197 | _Instruction.opname.__doc__ = "Human readable name for operation" |
| 198 | _Instruction.opcode.__doc__ = "Numeric code for operation" |
| 199 | _Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None" |
| 200 | _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg" |
| 201 | _Instruction.argrepr.__doc__ = "Human readable description of operation argument" |
| 202 | _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence" |
| 203 | _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None" |
| 204 | _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False" |
| 205 | |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 206 | _OPNAME_WIDTH = 20 |
| 207 | _OPARG_WIDTH = 5 |
| 208 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 209 | class Instruction(_Instruction): |
| 210 | """Details for a bytecode operation |
| 211 | |
| 212 | Defined fields: |
| 213 | opname - human readable name for operation |
| 214 | opcode - numeric code for operation |
| 215 | arg - numeric argument to operation (if any), otherwise None |
| 216 | argval - resolved arg value (if known), otherwise same as arg |
| 217 | argrepr - human readable description of operation argument |
| 218 | offset - start index of operation within bytecode sequence |
| 219 | starts_line - line started by this opcode (if any), otherwise None |
| 220 | is_jump_target - True if other code jumps to here, otherwise False |
| 221 | """ |
| 222 | |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 223 | def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 224 | """Format instruction details for inclusion in disassembly output |
| 225 | |
| 226 | *lineno_width* sets the width of the line number field (0 omits it) |
| 227 | *mark_as_current* inserts a '-->' marker arrow as part of the line |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 228 | *offset_width* sets the width of the instruction offset field |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 229 | """ |
| 230 | fields = [] |
| 231 | # Column: Source code line number |
| 232 | if lineno_width: |
| 233 | if self.starts_line is not None: |
| 234 | lineno_fmt = "%%%dd" % lineno_width |
| 235 | fields.append(lineno_fmt % self.starts_line) |
| 236 | else: |
| 237 | fields.append(' ' * lineno_width) |
| 238 | # Column: Current instruction indicator |
| 239 | if mark_as_current: |
| 240 | fields.append('-->') |
| 241 | else: |
| 242 | fields.append(' ') |
| 243 | # Column: Jump target marker |
| 244 | if self.is_jump_target: |
| 245 | fields.append('>>') |
| 246 | else: |
| 247 | fields.append(' ') |
| 248 | # Column: Instruction offset from start of code sequence |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 249 | fields.append(repr(self.offset).rjust(offset_width)) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 250 | # Column: Opcode name |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 251 | fields.append(self.opname.ljust(_OPNAME_WIDTH)) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 252 | # Column: Opcode argument |
| 253 | if self.arg is not None: |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 254 | fields.append(repr(self.arg).rjust(_OPARG_WIDTH)) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 255 | # Column: Opcode argument details |
| 256 | if self.argrepr: |
| 257 | fields.append('(' + self.argrepr + ')') |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 258 | return ' '.join(fields).rstrip() |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 259 | |
| 260 | |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 261 | def get_instructions(x, *, first_line=None): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 262 | """Iterator for the opcodes in methods, functions or code |
| 263 | |
| 264 | Generates a series of Instruction named tuples giving the details of |
| 265 | each operations in the supplied code. |
| 266 | |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 267 | If *first_line* is not None, it indicates the line number that should |
| 268 | be reported for the first source line in the disassembled code. |
| 269 | Otherwise, the source line information (if any) is taken directly from |
| 270 | the disassembled code object. |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 271 | """ |
| 272 | co = _get_code_object(x) |
| 273 | cell_names = co.co_cellvars + co.co_freevars |
Armin Rigo | 9c8f7ea | 2003-10-28 12:17:25 +0000 | [diff] [blame] | 274 | linestarts = dict(findlinestarts(co)) |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 275 | if first_line is not None: |
| 276 | line_offset = first_line - co.co_firstlineno |
| 277 | else: |
| 278 | line_offset = 0 |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 279 | return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, |
| 280 | co.co_consts, cell_names, linestarts, |
| 281 | line_offset) |
| 282 | |
| 283 | def _get_const_info(const_index, const_list): |
| 284 | """Helper to get optional details about const references |
| 285 | |
| 286 | Returns the dereferenced constant and its repr if the constant |
| 287 | list is defined. |
| 288 | Otherwise returns the constant index and its repr(). |
| 289 | """ |
| 290 | argval = const_index |
| 291 | if const_list is not None: |
| 292 | argval = const_list[const_index] |
| 293 | return argval, repr(argval) |
| 294 | |
| 295 | def _get_name_info(name_index, name_list): |
| 296 | """Helper to get optional details about named references |
| 297 | |
| 298 | Returns the dereferenced name as both value and repr if the name |
| 299 | list is defined. |
| 300 | Otherwise returns the name index and its repr(). |
| 301 | """ |
| 302 | argval = name_index |
| 303 | if name_list is not None: |
| 304 | argval = name_list[name_index] |
| 305 | argrepr = argval |
| 306 | else: |
| 307 | argrepr = repr(argval) |
| 308 | return argval, argrepr |
| 309 | |
| 310 | |
| 311 | def _get_instructions_bytes(code, varnames=None, names=None, constants=None, |
| 312 | cells=None, linestarts=None, line_offset=0): |
| 313 | """Iterate over the instructions in a bytecode string. |
| 314 | |
| 315 | Generates a sequence of Instruction namedtuples giving the details of each |
| 316 | opcode. Additional information about the code's runtime environment |
| 317 | (e.g. variable names, constants) can be specified using optional |
| 318 | arguments. |
| 319 | |
| 320 | """ |
| 321 | labels = findlabels(code) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 322 | starts_line = None |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 323 | for offset, op, arg in _unpack_opargs(code): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 324 | if linestarts is not None: |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 325 | starts_line = linestarts.get(offset, None) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 326 | if starts_line is not None: |
| 327 | starts_line += line_offset |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 328 | is_jump_target = offset in labels |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 329 | argval = None |
| 330 | argrepr = '' |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 331 | if arg is not None: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 332 | # Set argval to the dereferenced value of the argument when |
Serhiy Storchaka | b0f80b0 | 2016-05-24 09:15:14 +0300 | [diff] [blame] | 333 | # available, and argrepr to the string representation of argval. |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 334 | # _disassemble_bytes needs the string repr of the |
| 335 | # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. |
| 336 | argval = arg |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 337 | if op in hasconst: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 338 | argval, argrepr = _get_const_info(arg, constants) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 339 | elif op in hasname: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 340 | argval, argrepr = _get_name_info(arg, names) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 341 | elif op in hasjrel: |
Serhiy Storchaka | b0f80b0 | 2016-05-24 09:15:14 +0300 | [diff] [blame] | 342 | argval = offset + 2 + arg |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 343 | argrepr = "to " + repr(argval) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 344 | elif op in haslocal: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 345 | argval, argrepr = _get_name_info(arg, varnames) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 346 | elif op in hascompare: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 347 | argval = cmp_op[arg] |
| 348 | argrepr = argval |
Jeremy Hylton | a39414b | 2001-01-25 20:08:47 +0000 | [diff] [blame] | 349 | elif op in hasfree: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 350 | argval, argrepr = _get_name_info(arg, cells) |
Serhiy Storchaka | dd102f7 | 2016-10-08 12:34:25 +0300 | [diff] [blame] | 351 | elif op == FORMAT_VALUE: |
Serhiy Storchaka | e2732d3 | 2018-03-11 11:07:06 +0200 | [diff] [blame] | 352 | argval, argrepr = FORMAT_VALUE_CONVERTERS[arg & 0x3] |
| 353 | argval = (argval, bool(arg & 0x4)) |
Serhiy Storchaka | dd102f7 | 2016-10-08 12:34:25 +0300 | [diff] [blame] | 354 | if argval[1]: |
| 355 | if argrepr: |
| 356 | argrepr += ', ' |
| 357 | argrepr += 'with format' |
Serhiy Storchaka | e2732d3 | 2018-03-11 11:07:06 +0200 | [diff] [blame] | 358 | elif op == MAKE_FUNCTION: |
| 359 | argrepr = ', '.join(s for i, s in enumerate(MAKE_FUNCTION_FLAGS) |
| 360 | if arg & (1<<i)) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 361 | yield Instruction(opname[op], op, |
| 362 | arg, argval, argrepr, |
| 363 | offset, starts_line, is_jump_target) |
| 364 | |
| 365 | def disassemble(co, lasti=-1, *, file=None): |
| 366 | """Disassemble a code object.""" |
| 367 | cell_names = co.co_cellvars + co.co_freevars |
| 368 | linestarts = dict(findlinestarts(co)) |
| 369 | _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names, |
| 370 | co.co_consts, cell_names, linestarts, file=file) |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 371 | |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 372 | def _disassemble_recursive(co, *, file=None, depth=None): |
| 373 | disassemble(co, file=file) |
| 374 | if depth is None or depth > 0: |
| 375 | if depth is not None: |
| 376 | depth = depth - 1 |
| 377 | for x in co.co_consts: |
| 378 | if hasattr(x, 'co_code'): |
| 379 | print(file=file) |
| 380 | print("Disassembly of %r:" % (x,), file=file) |
| 381 | _disassemble_recursive(x, file=file, depth=depth) |
| 382 | |
Nick Coghlan | 5c8b54e | 2010-07-03 07:36:51 +0000 | [diff] [blame] | 383 | def _disassemble_bytes(code, lasti=-1, varnames=None, names=None, |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 384 | constants=None, cells=None, linestarts=None, |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 385 | *, file=None, line_offset=0): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 386 | # Omit the line number column entirely if we have no line number info |
| 387 | show_lineno = linestarts is not None |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 388 | if show_lineno: |
| 389 | maxlineno = max(linestarts.values()) + line_offset |
| 390 | if maxlineno >= 1000: |
| 391 | lineno_width = len(str(maxlineno)) |
| 392 | else: |
| 393 | lineno_width = 3 |
| 394 | else: |
| 395 | lineno_width = 0 |
| 396 | maxoffset = len(code) - 2 |
| 397 | if maxoffset >= 10000: |
| 398 | offset_width = len(str(maxoffset)) |
| 399 | else: |
| 400 | offset_width = 4 |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 401 | for instr in _get_instructions_bytes(code, varnames, names, |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 402 | constants, cells, linestarts, |
| 403 | line_offset=line_offset): |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 404 | new_source_line = (show_lineno and |
| 405 | instr.starts_line is not None and |
| 406 | instr.offset > 0) |
| 407 | if new_source_line: |
| 408 | print(file=file) |
| 409 | is_current_instr = instr.offset == lasti |
Serhiy Storchaka | d90045f | 2017-04-19 20:36:31 +0300 | [diff] [blame] | 410 | print(instr._disassemble(lineno_width, is_current_instr, offset_width), |
| 411 | file=file) |
Skip Montanaro | 19c6ba3 | 2003-02-27 21:29:27 +0000 | [diff] [blame] | 412 | |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 413 | def _disassemble_str(source, **kwargs): |
Nick Coghlan | 5c8b54e | 2010-07-03 07:36:51 +0000 | [diff] [blame] | 414 | """Compile the source string, then disassemble the code object.""" |
Serhiy Storchaka | 1efbf92 | 2017-06-11 14:09:39 +0300 | [diff] [blame] | 415 | _disassemble_recursive(_try_compile(source, '<dis>'), **kwargs) |
Nick Coghlan | 5c8b54e | 2010-07-03 07:36:51 +0000 | [diff] [blame] | 416 | |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 417 | disco = disassemble # XXX For backwards compatibility |
Guido van Rossum | bd30795 | 1997-01-17 20:05:04 +0000 | [diff] [blame] | 418 | |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 419 | def _unpack_opargs(code): |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 420 | extended_arg = 0 |
Serhiy Storchaka | b0f80b0 | 2016-05-24 09:15:14 +0300 | [diff] [blame] | 421 | for i in range(0, len(code), 2): |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 422 | op = code[i] |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 423 | if op >= HAVE_ARGUMENT: |
Serhiy Storchaka | b0f80b0 | 2016-05-24 09:15:14 +0300 | [diff] [blame] | 424 | arg = code[i+1] | extended_arg |
| 425 | extended_arg = (arg << 8) if op == EXTENDED_ARG else 0 |
| 426 | else: |
| 427 | arg = None |
| 428 | yield (i, op, arg) |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 429 | |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 430 | def findlabels(code): |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 431 | """Detect all offsets in a byte code which are jump targets. |
Guido van Rossum | 421c224 | 1997-11-18 15:47:55 +0000 | [diff] [blame] | 432 | |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 433 | Return the list of offsets. |
Guido van Rossum | 421c224 | 1997-11-18 15:47:55 +0000 | [diff] [blame] | 434 | |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 435 | """ |
| 436 | labels = [] |
Serhiy Storchaka | 02d9f5e | 2016-05-08 23:43:50 +0300 | [diff] [blame] | 437 | for offset, op, arg in _unpack_opargs(code): |
| 438 | if arg is not None: |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 439 | if op in hasjrel: |
Serhiy Storchaka | b0f80b0 | 2016-05-24 09:15:14 +0300 | [diff] [blame] | 440 | label = offset + 2 + arg |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 441 | elif op in hasjabs: |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 442 | label = arg |
Serhiy Storchaka | b0f80b0 | 2016-05-24 09:15:14 +0300 | [diff] [blame] | 443 | else: |
| 444 | continue |
| 445 | if label not in labels: |
| 446 | labels.append(label) |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 447 | return labels |
Guido van Rossum | 217a5fa | 1990-12-26 15:40:07 +0000 | [diff] [blame] | 448 | |
Armin Rigo | 9c8f7ea | 2003-10-28 12:17:25 +0000 | [diff] [blame] | 449 | def findlinestarts(code): |
| 450 | """Find the offsets in a byte code which are start of lines in the source. |
| 451 | |
| 452 | Generate pairs (offset, lineno) as described in Python/compile.c. |
| 453 | |
| 454 | """ |
Victor Stinner | f3914eb | 2016-01-20 12:16:21 +0100 | [diff] [blame] | 455 | byte_increments = code.co_lnotab[0::2] |
| 456 | line_increments = code.co_lnotab[1::2] |
Armin Rigo | 9c8f7ea | 2003-10-28 12:17:25 +0000 | [diff] [blame] | 457 | |
| 458 | lastlineno = None |
| 459 | lineno = code.co_firstlineno |
| 460 | addr = 0 |
| 461 | for byte_incr, line_incr in zip(byte_increments, line_increments): |
| 462 | if byte_incr: |
| 463 | if lineno != lastlineno: |
| 464 | yield (addr, lineno) |
| 465 | lastlineno = lineno |
| 466 | addr += byte_incr |
Victor Stinner | f3914eb | 2016-01-20 12:16:21 +0100 | [diff] [blame] | 467 | if line_incr >= 0x80: |
| 468 | # line_increments is an array of 8-bit signed integers |
| 469 | line_incr -= 0x100 |
Armin Rigo | 9c8f7ea | 2003-10-28 12:17:25 +0000 | [diff] [blame] | 470 | lineno += line_incr |
| 471 | if lineno != lastlineno: |
| 472 | yield (addr, lineno) |
Guido van Rossum | 1fdae12 | 2000-02-04 17:47:55 +0000 | [diff] [blame] | 473 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 474 | class Bytecode: |
| 475 | """The bytecode operations of a piece of code |
| 476 | |
syncosmic | fe2b56a | 2017-08-17 19:29:21 -0700 | [diff] [blame] | 477 | Instantiate this with a function, method, other compiled object, string of |
| 478 | code, or a code object (as returned by compile()). |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 479 | |
| 480 | Iterating over this yields the bytecode operations as Instruction instances. |
| 481 | """ |
Nick Coghlan | 50c48b8 | 2013-11-23 00:57:00 +1000 | [diff] [blame] | 482 | def __init__(self, x, *, first_line=None, current_offset=None): |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 483 | self.codeobj = co = _get_code_object(x) |
| 484 | if first_line is None: |
| 485 | self.first_line = co.co_firstlineno |
| 486 | self._line_offset = 0 |
| 487 | else: |
| 488 | self.first_line = first_line |
| 489 | self._line_offset = first_line - co.co_firstlineno |
| 490 | self._cell_names = co.co_cellvars + co.co_freevars |
| 491 | self._linestarts = dict(findlinestarts(co)) |
| 492 | self._original_object = x |
Nick Coghlan | 50c48b8 | 2013-11-23 00:57:00 +1000 | [diff] [blame] | 493 | self.current_offset = current_offset |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 494 | |
| 495 | def __iter__(self): |
| 496 | co = self.codeobj |
| 497 | return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names, |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 498 | co.co_consts, self._cell_names, |
| 499 | self._linestarts, |
| 500 | line_offset=self._line_offset) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 501 | |
| 502 | def __repr__(self): |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 503 | return "{}({!r})".format(self.__class__.__name__, |
| 504 | self._original_object) |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 505 | |
Nick Coghlan | 50c48b8 | 2013-11-23 00:57:00 +1000 | [diff] [blame] | 506 | @classmethod |
| 507 | def from_traceback(cls, tb): |
| 508 | """ Construct a Bytecode from the given traceback """ |
| 509 | while tb.tb_next: |
| 510 | tb = tb.tb_next |
| 511 | return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti) |
| 512 | |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 513 | def info(self): |
| 514 | """Return formatted information about the code object.""" |
| 515 | return _format_code_info(self.codeobj) |
| 516 | |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 517 | def dis(self): |
| 518 | """Return a formatted view of the bytecode operations.""" |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 519 | co = self.codeobj |
Nick Coghlan | 50c48b8 | 2013-11-23 00:57:00 +1000 | [diff] [blame] | 520 | if self.current_offset is not None: |
| 521 | offset = self.current_offset |
| 522 | else: |
| 523 | offset = -1 |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 524 | with io.StringIO() as output: |
| 525 | _disassemble_bytes(co.co_code, varnames=co.co_varnames, |
| 526 | names=co.co_names, constants=co.co_consts, |
| 527 | cells=self._cell_names, |
| 528 | linestarts=self._linestarts, |
| 529 | line_offset=self._line_offset, |
Nick Coghlan | 50c48b8 | 2013-11-23 00:57:00 +1000 | [diff] [blame] | 530 | file=output, |
| 531 | lasti=offset) |
Nick Coghlan | 90b8e7d | 2013-11-06 22:08:36 +1000 | [diff] [blame] | 532 | return output.getvalue() |
Nick Coghlan | b39fd0c | 2013-05-06 23:59:20 +1000 | [diff] [blame] | 533 | |
| 534 | |
Guido van Rossum | 1fdae12 | 2000-02-04 17:47:55 +0000 | [diff] [blame] | 535 | def _test(): |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 536 | """Simple test program to disassemble a file.""" |
Nick Coghlan | 0956689 | 2013-08-25 00:48:17 +1000 | [diff] [blame] | 537 | import argparse |
| 538 | |
| 539 | parser = argparse.ArgumentParser() |
| 540 | parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-') |
| 541 | args = parser.parse_args() |
| 542 | with args.infile as infile: |
| 543 | source = infile.read() |
| 544 | code = compile(source, args.infile.name, "exec") |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 545 | dis(code) |
Guido van Rossum | 1fdae12 | 2000-02-04 17:47:55 +0000 | [diff] [blame] | 546 | |
| 547 | if __name__ == "__main__": |
Tim Peters | 88869f9 | 2001-01-14 23:36:06 +0000 | [diff] [blame] | 548 | _test() |