Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 | # |
| 4 | # The LLVM Compiler Infrastructure |
| 5 | # |
| 6 | # This file is distributed under the University of Illinois Open Source |
| 7 | # License. See LICENSE.TXT for details. |
| 8 | # |
| 9 | #===------------------------------------------------------------------------===# |
| 10 | import os |
| 11 | import re |
| 12 | import sys |
| 13 | import string |
| 14 | import subprocess |
| 15 | |
| 16 | pipes = {} |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 17 | filetypes = {} |
| 18 | DEBUG=False |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 19 | |
| 20 | def patch_address(frameno, addr_s): |
| 21 | ''' Subtracts 1 or 2 from the top frame's address. |
| 22 | Top frame is normally the return address from asan_report* |
| 23 | call, which is not expected to return at all. Because of that, this |
| 24 | address often belongs to the next source code line, or even to a different |
| 25 | function. ''' |
| 26 | if frameno == '0': |
| 27 | addr = int(addr_s, 16) |
| 28 | if os.uname()[4].startswith('arm'): |
| 29 | # Cancel the Thumb bit |
| 30 | addr = addr & (~1) |
| 31 | addr -= 1 |
| 32 | return hex(addr) |
| 33 | return addr_s |
| 34 | |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 35 | |
| 36 | def fix_filename(file_name): |
| 37 | for path_to_cut in sys.argv[1:]: |
| 38 | file_name = re.sub(".*" + path_to_cut, "", file_name) |
| 39 | file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) |
| 40 | file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) |
| 41 | return file_name |
| 42 | |
| 43 | |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 44 | # TODO(glider): need some refactoring here |
| 45 | def symbolize_addr2line(line): |
| 46 | #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 47 | match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) |
| 48 | if match: |
| 49 | frameno = match.group(2) |
| 50 | binary = match.group(3) |
| 51 | addr = match.group(4) |
| 52 | addr = patch_address(frameno, addr) |
| 53 | if not pipes.has_key(binary): |
| 54 | pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary], |
| 55 | stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 56 | p = pipes[binary] |
| 57 | try: |
| 58 | print >>p.stdin, addr |
| 59 | function_name = p.stdout.readline().rstrip() |
| 60 | file_name = p.stdout.readline().rstrip() |
| 61 | except: |
| 62 | function_name = "" |
| 63 | file_name = "" |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 64 | file_name = fix_filename(file_name) |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 65 | |
| 66 | print match.group(1), "in", function_name, file_name |
| 67 | else: |
| 68 | print line.rstrip() |
| 69 | |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 70 | |
| 71 | def get_macho_filetype(binary): |
| 72 | if not filetypes.has_key(binary): |
| 73 | otool_pipe = subprocess.Popen(["otool", "-Vh", binary], |
| 74 | stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 75 | otool_line = "".join(otool_pipe.stdout.readlines()) |
| 76 | for t in ["DYLIB", "EXECUTE"]: |
| 77 | if t in otool_line: |
| 78 | filetypes[binary] = t |
| 79 | otool_pipe.stdin.close() |
| 80 | return filetypes[binary] |
| 81 | |
| 82 | |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 83 | def symbolize_atos(line): |
| 84 | #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 85 | match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line) |
| 86 | if match: |
| 87 | #print line |
| 88 | prefix = match.group(1) |
| 89 | frameno = match.group(2) |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 90 | orig_addr = match.group(3) |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 91 | binary = match.group(4) |
| 92 | offset = match.group(5) |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 93 | addr = patch_address(frameno, orig_addr) |
| 94 | load_addr = hex(int(orig_addr, 16) - int(offset, 16)) |
| 95 | filetype = get_macho_filetype(binary) |
| 96 | |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 97 | if not pipes.has_key(binary): |
Alexander Potapenko | 1f397fb | 2012-01-24 10:44:44 +0000 | [diff] [blame] | 98 | # Guess which arch we're running. 10 = len("0x") + 8 hex digits. |
| 99 | if len(addr) > 10: |
| 100 | arch = "x86_64" |
| 101 | else: |
| 102 | arch = "i386" |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 103 | |
| 104 | if filetype == "DYLIB": |
| 105 | load_addr = "0x0" |
| 106 | if DEBUG: |
| 107 | print "atos -o %s -arch %s -l %s" % (binary, arch, load_addr) |
| 108 | pipes[binary] = subprocess.Popen(["atos", "-o", binary, "-arch", arch, "-l", load_addr], |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 109 | stdin=subprocess.PIPE, stdout=subprocess.PIPE,) |
| 110 | p = pipes[binary] |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 111 | if filetype == "DYLIB": |
| 112 | print >>p.stdin, "%s" % offset |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 113 | else: |
Alexander Potapenko | 1f397fb | 2012-01-24 10:44:44 +0000 | [diff] [blame] | 114 | print >>p.stdin, "%s" % addr |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 115 | # TODO(glider): it's more efficient to make a batch atos run for each binary. |
| 116 | p.stdin.close() |
| 117 | atos_line = p.stdout.readline().rstrip() |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame^] | 118 | # A well-formed atos response looks like this: |
| 119 | # foo(type1, type2) (in object.name) (filename.cc:80) |
| 120 | match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
| 121 | #print "atos_line: ", atos_line |
| 122 | if match: |
| 123 | function_name = match.group(1) |
| 124 | function_name = re.sub("\(.*?\)", "", function_name) |
| 125 | file_name = fix_filename(match.group(3)) |
| 126 | print "%s%s in %s %s" % (prefix, addr, function_name, file_name) |
| 127 | else: |
| 128 | print "%s%s in %s" % (prefix, addr, atos_line) |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 129 | del pipes[binary] |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 130 | else: |
| 131 | print line.rstrip() |
| 132 | |
| 133 | system = os.uname()[0] |
| 134 | if system in ['Linux', 'Darwin']: |
| 135 | for line in sys.stdin: |
| 136 | if system == 'Linux': |
| 137 | symbolize_addr2line(line) |
| 138 | elif system == 'Darwin': |
| 139 | symbolize_atos(line) |
| 140 | else: |
| 141 | print 'Unknown system: ', system |