Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| 3 | # |
| 4 | # The LLVM Compiler Infrastructure |
| 5 | # |
| 6 | # This file is distributed under the University of Illinois Open Source |
| 7 | # License. See LICENSE.TXT for details. |
| 8 | # |
| 9 | #===------------------------------------------------------------------------===# |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 10 | import bisect |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 11 | import os |
| 12 | import re |
| 13 | import sys |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 14 | import subprocess |
| 15 | |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 16 | llvm_symbolizer = None |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 17 | symbolizers = {} |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 18 | filetypes = {} |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 19 | vmaddrs = {} |
| 20 | DEBUG = False |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 21 | |
Alexander Potapenko | 970a9b9 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 22 | |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 23 | def fix_filename(file_name): |
| 24 | for path_to_cut in sys.argv[1:]: |
| 25 | file_name = re.sub(".*" + path_to_cut, "", file_name) |
| 26 | file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) |
| 27 | file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) |
| 28 | return file_name |
| 29 | |
| 30 | |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 31 | class Symbolizer(object): |
| 32 | def __init__(self): |
| 33 | pass |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 34 | def symbolize(self, addr, binary, offset): |
| 35 | """ |
| 36 | Overrided in subclasses. |
| 37 | Args: |
| 38 | addr: virtual address of an instruction. |
| 39 | binary: path to executable/shared object containing this instruction. |
| 40 | offset: instruction offset in the @binary. |
| 41 | Returns: |
| 42 | list of strings (one string for each inlined frame) describing |
| 43 | the code locations for this instruction (that is, function name, file |
| 44 | name, line and column numbers). |
| 45 | """ |
| 46 | return None |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 47 | |
| 48 | |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 49 | class LLVMSymbolizer(Symbolizer): |
| 50 | def __init__(self, symbolizer_path): |
| 51 | super(LLVMSymbolizer, self).__init__() |
| 52 | self.symbolizer_path = symbolizer_path |
| 53 | self.pipe = self.open_llvm_symbolizer() |
| 54 | def open_llvm_symbolizer(self): |
| 55 | cmd = [self.symbolizer_path, |
| 56 | "--use-symbol-table=false", # FIXME: Remove this when libObject is |
| 57 | # fixed. |
| 58 | "--demangle=false", |
| 59 | "--functions=true", |
| 60 | "--inlining=true"] |
| 61 | if DEBUG: |
| 62 | print ' '.join(cmd) |
| 63 | return subprocess.Popen(cmd, stdin=subprocess.PIPE, |
| 64 | stdout=subprocess.PIPE) |
| 65 | |
| 66 | def symbolize(self, addr, binary, offset): |
| 67 | """Overrides Symbolizer.symbolize""" |
| 68 | result = [] |
| 69 | try: |
| 70 | symbolizer_input = "%s %s" % (binary, offset) |
| 71 | print >> self.pipe.stdin, symbolizer_input |
| 72 | while True: |
| 73 | function_name = self.pipe.stdout.readline().rstrip() |
| 74 | if (function_name == ""): |
| 75 | break |
| 76 | file_name = self.pipe.stdout.readline().rstrip() |
| 77 | file_name = fix_filename(file_name) |
| 78 | if (not function_name.startswith("??") and |
| 79 | not file_name.startswith("??")): |
| 80 | # Append only valid frames. |
| 81 | result.append("%s in %s %s" % (addr, function_name, |
| 82 | file_name)) |
| 83 | except Exception: |
| 84 | result = [] |
| 85 | if len(result) == 0: |
| 86 | result = None |
| 87 | return result |
| 88 | |
| 89 | |
| 90 | def LLVMSymbolizerFactory(system): |
| 91 | if system == "Linux": |
| 92 | symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH") |
| 93 | if not symbolizer_path: |
| 94 | # Assume llvm-symbolizer is in PATH. |
| 95 | symbolizer_path = "llvm-symbolizer" |
| 96 | return LLVMSymbolizer(symbolizer_path) |
| 97 | return None |
| 98 | |
| 99 | |
| 100 | class Addr2LineSymbolizer(Symbolizer): |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 101 | def __init__(self, binary): |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 102 | super(Addr2LineSymbolizer, self).__init__() |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 103 | self.binary = binary |
| 104 | self.pipe = self.open_addr2line() |
| 105 | def open_addr2line(self): |
Alexander Potapenko | 5cfa30e | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 106 | cmd = ["addr2line", "-f", "-e", self.binary] |
| 107 | if DEBUG: |
| 108 | print ' '.join(cmd) |
| 109 | return subprocess.Popen(cmd, |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 110 | stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 111 | def symbolize(self, addr, binary, offset): |
| 112 | """Overrides Symbolizer.symbolize""" |
| 113 | if self.binary != binary: |
| 114 | return None |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 115 | try: |
Alexander Potapenko | 5cfa30e | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 116 | print >> self.pipe.stdin, offset |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 117 | function_name = self.pipe.stdout.readline().rstrip() |
| 118 | file_name = self.pipe.stdout.readline().rstrip() |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 119 | except Exception: |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 120 | function_name = "" |
| 121 | file_name = "" |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 122 | file_name = fix_filename(file_name) |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 123 | return ["%s in %s %s" % (addr, function_name, file_name)] |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 124 | |
| 125 | |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 126 | class DarwinSymbolizer(Symbolizer): |
| 127 | def __init__(self, addr, binary): |
| 128 | super(DarwinSymbolizer, self).__init__() |
| 129 | self.binary = binary |
| 130 | # Guess which arch we're running. 10 = len("0x") + 8 hex digits. |
| 131 | if len(addr) > 10: |
| 132 | self.arch = "x86_64" |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 133 | else: |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 134 | self.arch = "i386" |
| 135 | self.vmaddr = None |
| 136 | self.pipe = None |
| 137 | def get_binary_vmaddr(self): |
| 138 | """ |
| 139 | Get the slide value to be added to the address. |
| 140 | We're ooking for the following piece in otool -l output: |
| 141 | Load command 0 |
| 142 | cmd LC_SEGMENT |
| 143 | cmdsize 736 |
| 144 | segname __TEXT |
| 145 | vmaddr 0x00000000 |
| 146 | """ |
| 147 | if self.vmaddr: |
| 148 | return self.vmaddr |
| 149 | cmdline = ["otool", "-l", self.binary] |
| 150 | pipe = subprocess.Popen(cmdline, |
| 151 | stdin=subprocess.PIPE, |
| 152 | stdout=subprocess.PIPE) |
| 153 | is_text = False |
| 154 | vmaddr = 0 |
| 155 | for line in pipe.stdout.readlines(): |
| 156 | line = line.strip() |
| 157 | if line.startswith('segname'): |
| 158 | is_text = (line == 'segname __TEXT') |
| 159 | continue |
| 160 | if line.startswith('vmaddr') and is_text: |
| 161 | sv = line.split(' ') |
| 162 | vmaddr = int(sv[-1], 16) |
| 163 | break |
| 164 | self.vmaddr = vmaddr |
| 165 | return self.vmaddr |
| 166 | def write_addr_to_pipe(self, offset): |
| 167 | slide = self.get_binary_vmaddr() |
| 168 | print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) |
| 169 | def open_atos(self): |
| 170 | if DEBUG: |
| 171 | print "atos -o %s -arch %s" % (self.binary, self.arch) |
| 172 | cmdline = ["atos", "-o", self.binary, "-arch", self.arch] |
| 173 | self.pipe = subprocess.Popen(cmdline, |
| 174 | stdin=subprocess.PIPE, |
| 175 | stdout=subprocess.PIPE, |
| 176 | stderr=subprocess.PIPE) |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 177 | def symbolize(self, addr, binary, offset): |
| 178 | """Overrides Symbolizer.symbolize""" |
| 179 | if self.binary != binary: |
| 180 | return None |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 181 | self.open_atos() |
| 182 | self.write_addr_to_pipe(offset) |
| 183 | self.pipe.stdin.close() |
| 184 | atos_line = self.pipe.stdout.readline().rstrip() |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 185 | # A well-formed atos response looks like this: |
| 186 | # foo(type1, type2) (in object.name) (filename.cc:80) |
| 187 | match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 188 | if DEBUG: |
| 189 | print "atos_line: ", atos_line |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 190 | if match: |
| 191 | function_name = match.group(1) |
| 192 | function_name = re.sub("\(.*?\)", "", function_name) |
| 193 | file_name = fix_filename(match.group(3)) |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 194 | return ["%s in %s %s" % (addr, function_name, file_name)] |
Alexander Potapenko | 0042411 | 2012-01-26 17:06:50 +0000 | [diff] [blame] | 195 | else: |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 196 | return ["%s in %s" % (addr, atos_line)] |
Kostya Serebryany | 1e172b4 | 2011-11-30 01:07:02 +0000 | [diff] [blame] | 197 | |
Alexander Potapenko | 970a9b9 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 198 | |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 199 | # Chain several symbolizers so that if one symbolizer fails, we fall back |
| 200 | # to the next symbolizer in chain. |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 201 | class ChainSymbolizer(Symbolizer): |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 202 | def __init__(self, symbolizer_list): |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 203 | super(ChainSymbolizer, self).__init__() |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 204 | self.symbolizer_list = symbolizer_list |
| 205 | def symbolize(self, addr, binary, offset): |
| 206 | """Overrides Symbolizer.symbolize""" |
| 207 | for symbolizer in self.symbolizer_list: |
| 208 | if symbolizer: |
| 209 | result = symbolizer.symbolize(addr, binary, offset) |
| 210 | if result: |
| 211 | return result |
| 212 | return None |
| 213 | def append_symbolizer(self, symbolizer): |
| 214 | self.symbolizer_list.append(symbolizer) |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 215 | |
| 216 | |
| 217 | def BreakpadSymbolizerFactory(addr, binary): |
| 218 | suffix = os.getenv("BREAKPAD_SUFFIX") |
| 219 | if suffix: |
| 220 | filename = binary + suffix |
| 221 | if os.access(filename, os.F_OK): |
Alexander Potapenko | 5cfa30e | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 222 | return BreakpadSymbolizer(filename) |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 223 | return None |
| 224 | |
| 225 | |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 226 | def SystemSymbolizerFactory(system, addr, binary): |
| 227 | if system == 'Darwin': |
| 228 | return DarwinSymbolizer(addr, binary) |
| 229 | elif system == 'Linux': |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 230 | return Addr2LineSymbolizer(binary) |
Alexander Potapenko | 897e89f | 2012-08-02 14:58:04 +0000 | [diff] [blame] | 231 | |
| 232 | |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 233 | class BreakpadSymbolizer(Symbolizer): |
| 234 | def __init__(self, filename): |
| 235 | super(BreakpadSymbolizer, self).__init__() |
| 236 | self.filename = filename |
| 237 | lines = file(filename).readlines() |
| 238 | self.files = [] |
| 239 | self.symbols = {} |
| 240 | self.address_list = [] |
| 241 | self.addresses = {} |
| 242 | # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| 243 | fragments = lines[0].rstrip().split() |
| 244 | self.arch = fragments[2] |
| 245 | self.debug_id = fragments[3] |
| 246 | self.binary = ' '.join(fragments[4:]) |
| 247 | self.parse_lines(lines[1:]) |
| 248 | def parse_lines(self, lines): |
| 249 | cur_function_addr = '' |
| 250 | for line in lines: |
| 251 | fragments = line.split() |
| 252 | if fragments[0] == 'FILE': |
| 253 | assert int(fragments[1]) == len(self.files) |
| 254 | self.files.append(' '.join(fragments[2:])) |
| 255 | elif fragments[0] == 'PUBLIC': |
| 256 | self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
Alexander Potapenko | 5cfa30e | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 257 | elif fragments[0] in ['CFI', 'STACK']: |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 258 | pass |
| 259 | elif fragments[0] == 'FUNC': |
| 260 | cur_function_addr = int(fragments[1], 16) |
Alexander Potapenko | 5cfa30e | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 261 | if not cur_function_addr in self.symbols.keys(): |
| 262 | self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 263 | else: |
| 264 | # Line starting with an address. |
| 265 | addr = int(fragments[0], 16) |
| 266 | self.address_list.append(addr) |
| 267 | # Tuple of symbol address, size, line, file number. |
| 268 | self.addresses[addr] = (cur_function_addr, |
| 269 | int(fragments[1], 16), |
| 270 | int(fragments[2]), |
| 271 | int(fragments[3])) |
| 272 | self.address_list.sort() |
| 273 | def get_sym_file_line(self, addr): |
| 274 | key = None |
| 275 | if addr in self.addresses.keys(): |
| 276 | key = addr |
| 277 | else: |
| 278 | index = bisect.bisect_left(self.address_list, addr) |
| 279 | if index == 0: |
| 280 | return None |
| 281 | else: |
| 282 | key = self.address_list[index - 1] |
| 283 | sym_id, size, line_no, file_no = self.addresses[key] |
| 284 | symbol = self.symbols[sym_id] |
| 285 | filename = self.files[file_no] |
| 286 | if addr < key + size: |
| 287 | return symbol, filename, line_no |
| 288 | else: |
| 289 | return None |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 290 | def symbolize(self, addr, binary, offset): |
| 291 | if self.binary != binary: |
| 292 | return None |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 293 | res = self.get_sym_file_line(int(offset, 16)) |
| 294 | if res: |
| 295 | function_name, file_name, line_no = res |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 296 | result = ["%s in %s %s:%d" % ( |
| 297 | addr, function_name, file_name, line_no)] |
Alexander Potapenko | 5cfa30e | 2012-08-15 13:58:24 +0000 | [diff] [blame] | 298 | print result |
| 299 | return result |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 300 | else: |
| 301 | return None |
| 302 | |
| 303 | |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 304 | def symbolize_address(system, addr, binary, offset): |
| 305 | # Use the chain of symbolizers: |
| 306 | # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos |
| 307 | # (fall back to next symbolizer if the previous one fails). |
| 308 | if not symbolizers.has_key(binary): |
| 309 | symbolizers[binary] = ChainSymbolizer( |
| 310 | [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer]) |
| 311 | result = symbolizers[binary].symbolize(addr, binary, offset) |
| 312 | if result is None: |
| 313 | # Initialize system symbolizer only if other symbolizers failed. |
| 314 | symbolizers[binary].append_symbolizer( |
| 315 | SystemSymbolizerFactory(system, addr, binary)) |
| 316 | result = symbolizers[binary].symbolize(addr, binary, offset) |
| 317 | # The system symbolizer must produce some result. |
| 318 | assert(result) |
| 319 | return result |
Alexander Potapenko | 8e39869 | 2012-08-02 13:59:23 +0000 | [diff] [blame] | 320 | |
Alexander Potapenko | 970a9b9 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 321 | def main(): |
| 322 | system = os.uname()[0] |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 323 | global llvm_symbolizer |
| 324 | llvm_symbolizer = LLVMSymbolizerFactory(system) |
| 325 | frame_no = 0 |
Alexander Potapenko | 970a9b9 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 326 | if system in ['Linux', 'Darwin']: |
| 327 | for line in sys.stdin: |
Alexey Samsonov | 63e4df4 | 2012-09-19 08:49:53 +0000 | [diff] [blame^] | 328 | #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| 329 | stack_trace_line_format = ( |
| 330 | "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)") |
| 331 | match = re.match(stack_trace_line_format, line) |
| 332 | if not match: |
| 333 | print line.rstrip() |
| 334 | continue |
| 335 | if DEBUG: |
| 336 | print line |
| 337 | prefix, frameno_str, addr, binary, offset = match.groups() |
| 338 | if (frameno_str == "0"): |
| 339 | # Assume that frame #0 is the first frame of new stack trace. |
| 340 | frame_no = 0 |
| 341 | symbolized_line = symbolize_address(system, addr, binary, offset) |
| 342 | if not symbolized_line: |
| 343 | print line.rstrip() |
| 344 | else: |
| 345 | for symbolized_frame in symbolized_line: |
| 346 | print " #" + str(frame_no) + " " + symbolized_frame.rstrip() |
| 347 | frame_no += 1 |
Alexander Potapenko | 970a9b9 | 2012-07-31 13:51:26 +0000 | [diff] [blame] | 348 | else: |
| 349 | print 'Unknown system: ', system |
| 350 | |
| 351 | |
| 352 | if __name__ == '__main__': |
| 353 | main() |