| #!/usr/bin/python |
| |
| """Disassemble the code stored in a tombstone. |
| |
| The classes in this module use an interface, ProcessLine, so that they can be |
| chained together to do arbitrary procerssing. The current classes support |
| disassembling the bytes embedded in tombstones and printing output to stdout. |
| """ |
| |
| |
| import re |
| import subprocess |
| import sys |
| import tempfile |
| import architecture |
| |
| |
| STANDARD_PROLOGUE = """ |
| .type _start, %function |
| .globl _start |
| _start: |
| """ |
| |
| |
| THUMB_PROLOGUE = STANDARD_PROLOGUE + """ |
| .code 16 |
| .thumb_func |
| .type thumb_start, %function |
| thumb_start: |
| """ |
| |
| |
| def Disassemble(line_generator): |
| abi_line = re.compile("(ABI: \'(.*)\')") |
| abi = None |
| tools = None |
| # Process global headers |
| for line in line_generator: |
| yield line |
| abi_header = abi_line.search(line) |
| if abi_header: |
| abi = abi_header.group(2) |
| # Look up the tools here so we don't do a lookup for each code block. |
| tools = architecture.Architecture(abi) |
| break |
| # The rest of the file consists of: |
| # o Lines that should pass through unchanged |
| # o Blocks of register values, which follow a 'pid: ...' line and end with |
| # 'backtrace:' line |
| # o Blocks of code represented as words, which start with 'code around ...' |
| # and end with a line that doesn't look like a list of words. |
| # |
| # The only constraint on the ordering of these blocks is that the register |
| # values must come before the first code block. |
| # |
| # It's easiest to nest register processing in the codeblock search loop. |
| register_list_re = re.compile('^pid: ') |
| codeblock_re = re.compile('^code around ([a-z0-9]+)|memory near (pc)') |
| register_text = {} |
| for line in line_generator: |
| yield line |
| if register_list_re.search(line): |
| register_text = {} |
| for output in ProcessRegisterList(line_generator, register_text): |
| yield output |
| code_match = codeblock_re.search(line) |
| if code_match: |
| code_reg = ''.join(code_match.groups('')) |
| for output in ProcessCodeBlock( |
| abi, tools, code_reg, register_text, line_generator): |
| yield output |
| |
| |
| def ProcessRegisterList(line_generator, rval): |
| for line in line_generator: |
| yield line |
| if line.startswith('backtrace:'): |
| return |
| # The register list is indented and consists of alternating name, value |
| # pairs. |
| if line.startswith(' '): |
| words = line.split() |
| assert len(words) % 2 == 0 |
| for index in range(0, len(words), 2): |
| rval[words[index]] = words[index + 1] |
| |
| |
| def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator): |
| program_counter = register_text[register_name] |
| program_counter_val = int(program_counter, 16) |
| scratch_file = tempfile.NamedTemporaryFile(suffix='.s') |
| # ARM code comes in two flavors: arm and thumb. Figure out the one |
| # to use by peeking in the cpsr. |
| if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20: |
| scratch_file.write(THUMB_PROLOGUE) |
| else: |
| scratch_file.write(STANDARD_PROLOGUE) |
| # Retains the hexadecimal text for the start of the block |
| start_address = None |
| # Maintains a numeric counter for the address of the current byte |
| current_address = None |
| # Handle the 3 differnt file formats that we've observerd. |
| if len(program_counter) == 8: |
| block_line_len = [67] |
| block_num_words = 4 |
| else: |
| assert len(program_counter) == 16 |
| block_line_len = [57, 73] |
| block_num_words = 2 |
| # Now generate assembly from the bytes in the code block. |
| for line in line_generator: |
| words = line.split() |
| # Be conservative and stop interpreting if the line length is wrong |
| # We can't count words because spaces can appear in the text representation |
| # of the memory. |
| if len(line) not in block_line_len: |
| break |
| # Double check the address at the start of each line |
| if current_address is None: |
| start_address = words[0] |
| current_address = int(start_address, 16) |
| else: |
| assert current_address == int(words[0], 16) |
| for word in words[1:block_num_words+1]: |
| # Handle byte swapping |
| for byte in tools.WordToBytes(word): |
| # Emit a label at the desired program counter. |
| # This will cause the disassembler to resynchronize at this point, |
| # allowing us to position the arrow and also ensuring that we decode |
| # the instruction properly. |
| if current_address == program_counter_val: |
| scratch_file.write('program_counter_was_here:\n') |
| scratch_file.write(' .byte 0x%s\n' % byte) |
| current_address += 1 |
| scratch_file.flush() |
| # Assemble the scratch file and relocate it to the block address with the |
| # linker. |
| object_file = tempfile.NamedTemporaryFile(suffix='.o') |
| subprocess.check_call(tools.Assemble([ |
| '-o', object_file.name, scratch_file.name])) |
| scratch_file.close() |
| |
| # Work around ARM data tagging: rename $d to $t. |
| if abi.startswith('arm'): |
| subprocess.check_call( |
| ['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name]) |
| |
| linked_file = tempfile.NamedTemporaryFile(suffix='.o') |
| cmd = tools.Link([ |
| '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name]) |
| subprocess.check_call(cmd) |
| object_file.close() |
| disassembler = subprocess.Popen(tools.Disassemble([ |
| '-S', linked_file.name]), stdout=subprocess.PIPE) |
| # Skip some of the annoying assembler headers. |
| emit = False |
| start_pattern = start_address + ' ' |
| # objdump padding varies between 32 bit and 64 bit architectures |
| arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val) |
| for line in disassembler.stdout: |
| emit = emit or line.startswith(start_pattern) |
| if emit and len(line) > 1 and line.find('program_counter_was_here') == -1: |
| if arrow_pattern.search(line): |
| yield '--->' + line |
| else: |
| yield ' ' + line |
| linked_file.close() |
| yield '\n' |
| |
| |
| def main(argv): |
| for fn in argv[1:]: |
| for line in Disassemble(open(fn, 'r')): |
| print line, |
| |
| |
| if __name__ == '__main__': |
| main(sys.argv) |