Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 | # |
| 4 | # The LLVM Compiler Infrastructure |
| 5 | # |
| 6 | # This file is distributed under the University of Illinois Open Source |
| 7 | # License. See LICENSE.TXT for details. |
| 8 | # |
| 9 | #===------------------------------------------------------------------------===# |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 10 | import bisect |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 11 | import os |
| 12 | import re |
| 13 | import sys |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 14 | import subprocess |
| 15 | |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 16 | symbolizers = {} |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 17 | filetypes = {} |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 18 | vmaddrs = {} |
| 19 | DEBUG = False |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 20 | |
Alexander Potapenko | 8aae955 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 21 | |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 22 | def fix_filename(file_name): |
| 23 | for path_to_cut in sys.argv[1:]: |
| 24 | file_name = re.sub(".*" + path_to_cut, "", file_name) |
| 25 | file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) |
| 26 | file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) |
| 27 | return file_name |
| 28 | |
| 29 | |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 30 | class Symbolizer(object): |
| 31 | def __init__(self): |
| 32 | pass |
| 33 | |
| 34 | |
| 35 | class LinuxSymbolizer(Symbolizer): |
| 36 | def __init__(self, binary): |
| 37 | super(LinuxSymbolizer, self).__init__() |
| 38 | self.binary = binary |
| 39 | self.pipe = self.open_addr2line() |
| 40 | def open_addr2line(self): |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 41 | cmd = ["addr2line", "-f", "-e", self.binary] |
| 42 | if DEBUG: |
| 43 | print ' '.join(cmd) |
| 44 | return subprocess.Popen(cmd, |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 45 | stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 46 | def symbolize(self, prefix, addr, offset): |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 47 | try: |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 48 | print >> self.pipe.stdin, offset |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 49 | function_name = self.pipe.stdout.readline().rstrip() |
| 50 | file_name = self.pipe.stdout.readline().rstrip() |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 51 | except Exception: |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 52 | function_name = "" |
| 53 | file_name = "" |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 54 | file_name = fix_filename(file_name) |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 55 | return "%s%s in %s %s" % (prefix, addr, function_name, file_name) |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 56 | |
| 57 | |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 58 | class DarwinSymbolizer(Symbolizer): |
| 59 | def __init__(self, addr, binary): |
| 60 | super(DarwinSymbolizer, self).__init__() |
| 61 | self.binary = binary |
| 62 | # Guess which arch we're running. 10 = len("0x") + 8 hex digits. |
| 63 | if len(addr) > 10: |
| 64 | self.arch = "x86_64" |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 65 | else: |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 66 | self.arch = "i386" |
| 67 | self.vmaddr = None |
| 68 | self.pipe = None |
| 69 | def get_binary_vmaddr(self): |
| 70 | """ |
| 71 | Get the slide value to be added to the address. |
| 72 | We're ooking for the following piece in otool -l output: |
| 73 | Load command 0 |
| 74 | cmd LC_SEGMENT |
| 75 | cmdsize 736 |
| 76 | segname __TEXT |
| 77 | vmaddr 0x00000000 |
| 78 | """ |
| 79 | if self.vmaddr: |
| 80 | return self.vmaddr |
| 81 | cmdline = ["otool", "-l", self.binary] |
| 82 | pipe = subprocess.Popen(cmdline, |
| 83 | stdin=subprocess.PIPE, |
| 84 | stdout=subprocess.PIPE) |
| 85 | is_text = False |
| 86 | vmaddr = 0 |
| 87 | for line in pipe.stdout.readlines(): |
| 88 | line = line.strip() |
| 89 | if line.startswith('segname'): |
| 90 | is_text = (line == 'segname __TEXT') |
| 91 | continue |
| 92 | if line.startswith('vmaddr') and is_text: |
| 93 | sv = line.split(' ') |
| 94 | vmaddr = int(sv[-1], 16) |
| 95 | break |
| 96 | self.vmaddr = vmaddr |
| 97 | return self.vmaddr |
| 98 | def write_addr_to_pipe(self, offset): |
| 99 | slide = self.get_binary_vmaddr() |
| 100 | print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) |
| 101 | def open_atos(self): |
| 102 | if DEBUG: |
| 103 | print "atos -o %s -arch %s" % (self.binary, self.arch) |
| 104 | cmdline = ["atos", "-o", self.binary, "-arch", self.arch] |
| 105 | self.pipe = subprocess.Popen(cmdline, |
| 106 | stdin=subprocess.PIPE, |
| 107 | stdout=subprocess.PIPE, |
| 108 | stderr=subprocess.PIPE) |
| 109 | def symbolize(self, prefix, addr, offset): |
| 110 | self.open_atos() |
| 111 | self.write_addr_to_pipe(offset) |
| 112 | self.pipe.stdin.close() |
| 113 | atos_line = self.pipe.stdout.readline().rstrip() |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 114 | # A well-formed atos response looks like this: |
| 115 | # foo(type1, type2) (in object.name) (filename.cc:80) |
| 116 | match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 117 | if DEBUG: |
| 118 | print "atos_line: ", atos_line |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 119 | if match: |
| 120 | function_name = match.group(1) |
| 121 | function_name = re.sub("\(.*?\)", "", function_name) |
| 122 | file_name = fix_filename(match.group(3)) |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 123 | return "%s%s in %s %s" % (prefix, addr, function_name, file_name) |
Alexander Potapenko | 02a7162 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 124 | else: |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 125 | return "%s%s in %s" % (prefix, addr, atos_line) |
Kostya Serebryany | 019b76f | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 126 | |
Alexander Potapenko | 8aae955 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 127 | |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 128 | # Chain two symbolizers so that the second one is called if the first fails. |
| 129 | class ChainSymbolizer(Symbolizer): |
| 130 | def __init__(self, symbolizer1, symbolizer2): |
| 131 | super(ChainSymbolizer, self).__init__() |
| 132 | self.symbolizer1 = symbolizer1 |
| 133 | self.symbolizer2 = symbolizer2 |
| 134 | def symbolize(self, prefix, addr, offset): |
| 135 | result = self.symbolizer1.symbolize(prefix, addr, offset) |
| 136 | if result is None: |
| 137 | result = self.symbolizer2.symbolize(prefix, addr, offset) |
| 138 | return result |
| 139 | |
| 140 | |
| 141 | def BreakpadSymbolizerFactory(addr, binary): |
| 142 | suffix = os.getenv("BREAKPAD_SUFFIX") |
| 143 | if suffix: |
| 144 | filename = binary + suffix |
| 145 | if os.access(filename, os.F_OK): |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 146 | return BreakpadSymbolizer(filename) |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 147 | return None |
| 148 | |
| 149 | |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 150 | def SystemSymbolizerFactory(system, addr, binary): |
| 151 | if system == 'Darwin': |
| 152 | return DarwinSymbolizer(addr, binary) |
| 153 | elif system == 'Linux': |
| 154 | return LinuxSymbolizer(binary) |
| 155 | |
| 156 | |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 157 | class BreakpadSymbolizer(Symbolizer): |
| 158 | def __init__(self, filename): |
| 159 | super(BreakpadSymbolizer, self).__init__() |
| 160 | self.filename = filename |
| 161 | lines = file(filename).readlines() |
| 162 | self.files = [] |
| 163 | self.symbols = {} |
| 164 | self.address_list = [] |
| 165 | self.addresses = {} |
| 166 | # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| 167 | fragments = lines[0].rstrip().split() |
| 168 | self.arch = fragments[2] |
| 169 | self.debug_id = fragments[3] |
| 170 | self.binary = ' '.join(fragments[4:]) |
| 171 | self.parse_lines(lines[1:]) |
| 172 | def parse_lines(self, lines): |
| 173 | cur_function_addr = '' |
| 174 | for line in lines: |
| 175 | fragments = line.split() |
| 176 | if fragments[0] == 'FILE': |
| 177 | assert int(fragments[1]) == len(self.files) |
| 178 | self.files.append(' '.join(fragments[2:])) |
| 179 | elif fragments[0] == 'PUBLIC': |
| 180 | self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 181 | elif fragments[0] in ['CFI', 'STACK']: |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 182 | pass |
| 183 | elif fragments[0] == 'FUNC': |
| 184 | cur_function_addr = int(fragments[1], 16) |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 185 | if not cur_function_addr in self.symbols.keys(): |
| 186 | self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 187 | else: |
| 188 | # Line starting with an address. |
| 189 | addr = int(fragments[0], 16) |
| 190 | self.address_list.append(addr) |
| 191 | # Tuple of symbol address, size, line, file number. |
| 192 | self.addresses[addr] = (cur_function_addr, |
| 193 | int(fragments[1], 16), |
| 194 | int(fragments[2]), |
| 195 | int(fragments[3])) |
| 196 | self.address_list.sort() |
| 197 | def get_sym_file_line(self, addr): |
| 198 | key = None |
| 199 | if addr in self.addresses.keys(): |
| 200 | key = addr |
| 201 | else: |
| 202 | index = bisect.bisect_left(self.address_list, addr) |
| 203 | if index == 0: |
| 204 | return None |
| 205 | else: |
| 206 | key = self.address_list[index - 1] |
| 207 | sym_id, size, line_no, file_no = self.addresses[key] |
| 208 | symbol = self.symbols[sym_id] |
| 209 | filename = self.files[file_no] |
| 210 | if addr < key + size: |
| 211 | return symbol, filename, line_no |
| 212 | else: |
| 213 | return None |
| 214 | def symbolize(self, prefix, addr, offset): |
| 215 | res = self.get_sym_file_line(int(offset, 16)) |
| 216 | if res: |
| 217 | function_name, file_name, line_no = res |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 218 | result = "%s%s in %s %s:%d" % ( |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 219 | prefix, addr, function_name, file_name, line_no) |
Alexander Potapenko | 1800362 | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 220 | print result |
| 221 | return result |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 222 | else: |
| 223 | return None |
| 224 | |
| 225 | |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 226 | def symbolize_line(system, line): |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 227 | #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 228 | match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', |
| 229 | line) |
| 230 | if match: |
| 231 | if DEBUG: |
| 232 | print line |
| 233 | prefix = match.group(1) |
| 234 | # frameno = match.group(2) |
| 235 | addr = match.group(3) |
| 236 | binary = match.group(4) |
| 237 | offset = match.group(5) |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 238 | if not symbolizers.has_key(binary): |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 239 | p = BreakpadSymbolizerFactory(addr, binary) |
| 240 | if p: |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 241 | symbolizers[binary] = p |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 242 | else: |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 243 | symbolizers[binary] = SystemSymbolizerFactory(system, addr, binary) |
| 244 | result = symbolizers[binary].symbolize(prefix, addr, offset) |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 245 | if result is None: |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 246 | symbolizers[binary] = ChainSymbolizer(symbolizers[binary], |
| 247 | SystemSymbolizerFactory(system, addr, binary)) |
| 248 | return symbolizers[binary].symbolize(prefix, addr, offset) |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 249 | else: |
| 250 | return line |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 251 | |
Alexander Potapenko | be84ac8 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 252 | |
Alexander Potapenko | 8aae955 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 253 | def main(): |
| 254 | system = os.uname()[0] |
| 255 | if system in ['Linux', 'Darwin']: |
| 256 | for line in sys.stdin: |
Alexander Potapenko | 879b1ff | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 257 | line = symbolize_line(system, line) |
| 258 | print line.rstrip() |
Alexander Potapenko | 8aae955 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 259 | else: |
| 260 | print 'Unknown system: ', system |
| 261 | |
| 262 | |
| 263 | if __name__ == '__main__': |
| 264 | main() |