| #!/usr/bin/env python |
| #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| # |
| # The LLVM Compiler Infrastructure |
| # |
| # This file is distributed under the University of Illinois Open Source |
| # License. See LICENSE.TXT for details. |
| # |
| #===------------------------------------------------------------------------===# |
| import bisect |
| import getopt |
| import os |
| import re |
| import subprocess |
| import sys |
| |
| llvm_symbolizer = None |
| symbolizers = {} |
| DEBUG = False |
| demangle = False; |
| |
| |
| # FIXME: merge the code that calls fix_filename(). |
| def fix_filename(file_name): |
| for path_to_cut in sys.argv[1:]: |
| file_name = re.sub('.*' + path_to_cut, '', file_name) |
| file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) |
| file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) |
| return file_name |
| |
| |
| class Symbolizer(object): |
| def __init__(self): |
| pass |
| |
| def symbolize(self, addr, binary, offset): |
| """Symbolize the given address (pair of binary and offset). |
| |
| Overriden in subclasses. |
| Args: |
| addr: virtual address of an instruction. |
| binary: path to executable/shared object containing this instruction. |
| offset: instruction offset in the @binary. |
| Returns: |
| list of strings (one string for each inlined frame) describing |
| the code locations for this instruction (that is, function name, file |
| name, line and column numbers). |
| """ |
| return None |
| |
| |
| class LLVMSymbolizer(Symbolizer): |
| def __init__(self, symbolizer_path): |
| super(LLVMSymbolizer, self).__init__() |
| self.symbolizer_path = symbolizer_path |
| self.pipe = self.open_llvm_symbolizer() |
| |
| def open_llvm_symbolizer(self): |
| if not os.path.exists(self.symbolizer_path): |
| return None |
| cmd = [self.symbolizer_path, |
| '--use-symbol-table=true', |
| '--demangle=%s' % demangle, |
| '--functions=true', |
| '--inlining=true'] |
| if DEBUG: |
| print ' '.join(cmd) |
| return subprocess.Popen(cmd, stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE) |
| |
| def symbolize(self, addr, binary, offset): |
| """Overrides Symbolizer.symbolize.""" |
| if not self.pipe: |
| return None |
| result = [] |
| try: |
| symbolizer_input = '%s %s' % (binary, offset) |
| if DEBUG: |
| print symbolizer_input |
| print >> self.pipe.stdin, symbolizer_input |
| while True: |
| function_name = self.pipe.stdout.readline().rstrip() |
| if not function_name: |
| break |
| file_name = self.pipe.stdout.readline().rstrip() |
| file_name = fix_filename(file_name) |
| if (not function_name.startswith('??') and |
| not file_name.startswith('??')): |
| # Append only valid frames. |
| result.append('%s in %s %s' % (addr, function_name, |
| file_name)) |
| except Exception: |
| result = [] |
| if not result: |
| result = None |
| return result |
| |
| |
| def LLVMSymbolizerFactory(system): |
| symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') |
| if not symbolizer_path: |
| # Assume llvm-symbolizer is in PATH. |
| symbolizer_path = 'llvm-symbolizer' |
| return LLVMSymbolizer(symbolizer_path) |
| |
| |
| class Addr2LineSymbolizer(Symbolizer): |
| def __init__(self, binary): |
| super(Addr2LineSymbolizer, self).__init__() |
| self.binary = binary |
| self.pipe = self.open_addr2line() |
| |
| def open_addr2line(self): |
| cmd = ['addr2line', '-f'] |
| if demangle: |
| cmd += ['--demangle'] |
| cmd += ['-e', self.binary] |
| if DEBUG: |
| print ' '.join(cmd) |
| return subprocess.Popen(cmd, |
| stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| |
| def symbolize(self, addr, binary, offset): |
| """Overrides Symbolizer.symbolize.""" |
| if self.binary != binary: |
| return None |
| try: |
| print >> self.pipe.stdin, offset |
| function_name = self.pipe.stdout.readline().rstrip() |
| file_name = self.pipe.stdout.readline().rstrip() |
| except Exception: |
| function_name = '' |
| file_name = '' |
| file_name = fix_filename(file_name) |
| return ['%s in %s %s' % (addr, function_name, file_name)] |
| |
| |
| class DarwinSymbolizer(Symbolizer): |
| def __init__(self, addr, binary): |
| super(DarwinSymbolizer, self).__init__() |
| self.binary = binary |
| # Guess which arch we're running. 10 = len('0x') + 8 hex digits. |
| if len(addr) > 10: |
| self.arch = 'x86_64' |
| else: |
| self.arch = 'i386' |
| self.pipe = None |
| |
| def write_addr_to_pipe(self, offset): |
| print >> self.pipe.stdin, '0x%x' % int(offset, 16) |
| |
| def open_atos(self): |
| if DEBUG: |
| print 'atos -o %s -arch %s' % (self.binary, self.arch) |
| cmdline = ['atos', '-o', self.binary, '-arch', self.arch] |
| self.pipe = subprocess.Popen(cmdline, |
| stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE) |
| |
| def symbolize(self, addr, binary, offset): |
| """Overrides Symbolizer.symbolize.""" |
| if self.binary != binary: |
| return None |
| self.open_atos() |
| self.write_addr_to_pipe(offset) |
| self.pipe.stdin.close() |
| atos_line = self.pipe.stdout.readline().rstrip() |
| # A well-formed atos response looks like this: |
| # foo(type1, type2) (in object.name) (filename.cc:80) |
| match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
| if DEBUG: |
| print 'atos_line: ', atos_line |
| if match: |
| function_name = match.group(1) |
| function_name = re.sub('\(.*?\)', '', function_name) |
| file_name = fix_filename(match.group(3)) |
| return ['%s in %s %s' % (addr, function_name, file_name)] |
| else: |
| return ['%s in %s' % (addr, atos_line)] |
| |
| |
| # Chain several symbolizers so that if one symbolizer fails, we fall back |
| # to the next symbolizer in chain. |
| class ChainSymbolizer(Symbolizer): |
| def __init__(self, symbolizer_list): |
| super(ChainSymbolizer, self).__init__() |
| self.symbolizer_list = symbolizer_list |
| |
| def symbolize(self, addr, binary, offset): |
| """Overrides Symbolizer.symbolize.""" |
| for symbolizer in self.symbolizer_list: |
| if symbolizer: |
| result = symbolizer.symbolize(addr, binary, offset) |
| if result: |
| return result |
| return None |
| |
| def append_symbolizer(self, symbolizer): |
| self.symbolizer_list.append(symbolizer) |
| |
| |
| def BreakpadSymbolizerFactory(binary): |
| suffix = os.getenv('BREAKPAD_SUFFIX') |
| if suffix: |
| filename = binary + suffix |
| if os.access(filename, os.F_OK): |
| return BreakpadSymbolizer(filename) |
| return None |
| |
| |
| def SystemSymbolizerFactory(system, addr, binary): |
| if system == 'Darwin': |
| return DarwinSymbolizer(addr, binary) |
| elif system == 'Linux': |
| return Addr2LineSymbolizer(binary) |
| |
| |
| class BreakpadSymbolizer(Symbolizer): |
| def __init__(self, filename): |
| super(BreakpadSymbolizer, self).__init__() |
| self.filename = filename |
| lines = file(filename).readlines() |
| self.files = [] |
| self.symbols = {} |
| self.address_list = [] |
| self.addresses = {} |
| # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| fragments = lines[0].rstrip().split() |
| self.arch = fragments[2] |
| self.debug_id = fragments[3] |
| self.binary = ' '.join(fragments[4:]) |
| self.parse_lines(lines[1:]) |
| |
| def parse_lines(self, lines): |
| cur_function_addr = '' |
| for line in lines: |
| fragments = line.split() |
| if fragments[0] == 'FILE': |
| assert int(fragments[1]) == len(self.files) |
| self.files.append(' '.join(fragments[2:])) |
| elif fragments[0] == 'PUBLIC': |
| self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
| elif fragments[0] in ['CFI', 'STACK']: |
| pass |
| elif fragments[0] == 'FUNC': |
| cur_function_addr = int(fragments[1], 16) |
| if not cur_function_addr in self.symbols.keys(): |
| self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
| else: |
| # Line starting with an address. |
| addr = int(fragments[0], 16) |
| self.address_list.append(addr) |
| # Tuple of symbol address, size, line, file number. |
| self.addresses[addr] = (cur_function_addr, |
| int(fragments[1], 16), |
| int(fragments[2]), |
| int(fragments[3])) |
| self.address_list.sort() |
| |
| def get_sym_file_line(self, addr): |
| key = None |
| if addr in self.addresses.keys(): |
| key = addr |
| else: |
| index = bisect.bisect_left(self.address_list, addr) |
| if index == 0: |
| return None |
| else: |
| key = self.address_list[index - 1] |
| sym_id, size, line_no, file_no = self.addresses[key] |
| symbol = self.symbols[sym_id] |
| filename = self.files[file_no] |
| if addr < key + size: |
| return symbol, filename, line_no |
| else: |
| return None |
| |
| def symbolize(self, addr, binary, offset): |
| if self.binary != binary: |
| return None |
| res = self.get_sym_file_line(int(offset, 16)) |
| if res: |
| function_name, file_name, line_no = res |
| result = ['%s in %s %s:%d' % ( |
| addr, function_name, file_name, line_no)] |
| print result |
| return result |
| else: |
| return None |
| |
| |
| class SymbolizationLoop(object): |
| def __init__(self, binary_name_filter=None): |
| # Used by clients who may want to supply a different binary name. |
| # E.g. in Chrome several binaries may share a single .dSYM. |
| self.binary_name_filter = binary_name_filter |
| self.system = os.uname()[0] |
| if self.system in ['Linux', 'Darwin']: |
| self.llvm_symbolizer = LLVMSymbolizerFactory(self.system) |
| else: |
| raise Exception('Unknown system') |
| |
| def symbolize_address(self, addr, binary, offset): |
| # Use the chain of symbolizers: |
| # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
| # (fall back to next symbolizer if the previous one fails). |
| if not binary in symbolizers: |
| symbolizers[binary] = ChainSymbolizer( |
| [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer]) |
| result = symbolizers[binary].symbolize(addr, binary, offset) |
| if result is None: |
| # Initialize system symbolizer only if other symbolizers failed. |
| symbolizers[binary].append_symbolizer( |
| SystemSymbolizerFactory(self.system, addr, binary)) |
| result = symbolizers[binary].symbolize(addr, binary, offset) |
| # The system symbolizer must produce some result. |
| assert result |
| return result |
| |
| def print_symbolized_lines(self, symbolized_lines): |
| if not symbolized_lines: |
| print self.current_line |
| else: |
| for symbolized_frame in symbolized_lines: |
| print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip() |
| self.frame_no += 1 |
| |
| def process_stdin(self): |
| self.frame_no = 0 |
| while True: |
| line = sys.stdin.readline() |
| if not line: |
| break |
| self.current_line = line.rstrip() |
| #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| stack_trace_line_format = ( |
| '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') |
| match = re.match(stack_trace_line_format, line) |
| if not match: |
| print self.current_line |
| continue |
| if DEBUG: |
| print line |
| _, frameno_str, addr, binary, offset = match.groups() |
| if frameno_str == '0': |
| # Assume that frame #0 is the first frame of new stack trace. |
| self.frame_no = 0 |
| original_binary = binary |
| if self.binary_name_filter: |
| binary = self.binary_name_filter(binary) |
| symbolized_line = self.symbolize_address(addr, binary, offset) |
| if not symbolized_line: |
| if original_binary != binary: |
| symbolized_line = self.symbolize_address(addr, binary, offset) |
| self.print_symbolized_lines(symbolized_line) |
| |
| |
| if __name__ == '__main__': |
| opts, args = getopt.getopt(sys.argv[1:], "d", ["demangle"]) |
| for o, a in opts: |
| if o in ("-d", "--demangle"): |
| demangle = True; |
| loop = SymbolizationLoop() |
| loop.process_stdin() |