blob: b9d3ad3ad2fe6b7b9c3b96d9b366149387d16291 [file] [log] [blame]
Kostya Serebryany1e172b42011-11-30 01:07:02 +00001#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
Stephen Hines6d186232014-11-26 17:56:19 -080010import argparse
Alexander Potapenko8e398692012-08-02 13:59:23 +000011import bisect
Kostya Serebryany8e32db42013-02-18 08:02:16 +000012import getopt
Kostya Serebryany1e172b42011-11-30 01:07:02 +000013import os
14import re
Kostya Serebryany1e172b42011-11-30 01:07:02 +000015import subprocess
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000016import sys
Kostya Serebryany1e172b42011-11-30 01:07:02 +000017
Alexander Potapenko897e89f2012-08-02 14:58:04 +000018symbolizers = {}
Alexander Potapenko8e398692012-08-02 13:59:23 +000019DEBUG = False
Stephen Hines6d186232014-11-26 17:56:19 -080020demangle = False
21binutils_prefix = None
22sysroot_path = None
23binary_name_filter = None
24fix_filename_patterns = None
25logfile = sys.stdin
Pirama Arumuga Nainar7c915052015-04-08 08:58:29 -070026allow_system_symbolizer = True
Alexander Potapenko970a9b92012-07-31 13:51:26 +000027
Alexander Potapenkob2546c42012-09-26 12:12:41 +000028# FIXME: merge the code that calls fix_filename().
Alexander Potapenko00424112012-01-26 17:06:50 +000029def fix_filename(file_name):
Stephen Hines6d186232014-11-26 17:56:19 -080030 if fix_filename_patterns:
31 for path_to_cut in fix_filename_patterns:
32 file_name = re.sub('.*' + path_to_cut, '', file_name)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000033 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
34 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
Alexander Potapenko00424112012-01-26 17:06:50 +000035 return file_name
36
Stephen Hines6d186232014-11-26 17:56:19 -080037def sysroot_path_filter(binary_name):
38 return sysroot_path + binary_name
39
40def guess_arch(addr):
Stephen Hines2d1fdb22014-05-28 23:58:16 -070041 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
42 if len(addr) > 10:
43 return 'x86_64'
44 else:
45 return 'i386'
Alexander Potapenko00424112012-01-26 17:06:50 +000046
Alexander Potapenko897e89f2012-08-02 14:58:04 +000047class Symbolizer(object):
48 def __init__(self):
49 pass
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000050
Alexey Samsonov63e4df42012-09-19 08:49:53 +000051 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000052 """Symbolize the given address (pair of binary and offset).
53
54 Overriden in subclasses.
55 Args:
Alexey Samsonov63e4df42012-09-19 08:49:53 +000056 addr: virtual address of an instruction.
57 binary: path to executable/shared object containing this instruction.
58 offset: instruction offset in the @binary.
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000059 Returns:
Alexey Samsonov63e4df42012-09-19 08:49:53 +000060 list of strings (one string for each inlined frame) describing
61 the code locations for this instruction (that is, function name, file
62 name, line and column numbers).
63 """
64 return None
Alexander Potapenko897e89f2012-08-02 14:58:04 +000065
66
Alexey Samsonov63e4df42012-09-19 08:49:53 +000067class LLVMSymbolizer(Symbolizer):
Stephen Hines86277eb2015-03-23 12:06:32 -070068 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
Alexey Samsonov63e4df42012-09-19 08:49:53 +000069 super(LLVMSymbolizer, self).__init__()
70 self.symbolizer_path = symbolizer_path
Stephen Hines86277eb2015-03-23 12:06:32 -070071 self.default_arch = default_arch
72 self.system = system
73 self.dsym_hints = dsym_hints
Alexey Samsonov63e4df42012-09-19 08:49:53 +000074 self.pipe = self.open_llvm_symbolizer()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000075
Alexey Samsonov63e4df42012-09-19 08:49:53 +000076 def open_llvm_symbolizer(self):
77 cmd = [self.symbolizer_path,
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000078 '--use-symbol-table=true',
Kostya Serebryany8e32db42013-02-18 08:02:16 +000079 '--demangle=%s' % demangle,
Stephen Hines2d1fdb22014-05-28 23:58:16 -070080 '--functions=short',
81 '--inlining=true',
82 '--default-arch=%s' % self.default_arch]
Stephen Hines86277eb2015-03-23 12:06:32 -070083 if self.system == 'Darwin':
84 for hint in self.dsym_hints:
85 cmd.append('--dsym-hint=%s' % hint)
Alexey Samsonov63e4df42012-09-19 08:49:53 +000086 if DEBUG:
87 print ' '.join(cmd)
Stephen Hines2d1fdb22014-05-28 23:58:16 -070088 try:
89 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
90 stdout=subprocess.PIPE)
91 except OSError:
92 result = None
93 return result
Alexey Samsonov63e4df42012-09-19 08:49:53 +000094
95 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000096 """Overrides Symbolizer.symbolize."""
Alexey Samsonov75317312012-09-19 11:43:41 +000097 if not self.pipe:
98 return None
Alexey Samsonov63e4df42012-09-19 08:49:53 +000099 result = []
100 try:
Stephen Hines86277eb2015-03-23 12:06:32 -0700101 symbolizer_input = '"%s" %s' % (binary, offset)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000102 if DEBUG:
103 print symbolizer_input
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000104 print >> self.pipe.stdin, symbolizer_input
105 while True:
106 function_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000107 if not function_name:
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000108 break
109 file_name = self.pipe.stdout.readline().rstrip()
110 file_name = fix_filename(file_name)
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700111 if (not function_name.startswith('??') or
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000112 not file_name.startswith('??')):
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700113 # Append only non-trivial frames.
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000114 result.append('%s in %s %s' % (addr, function_name,
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000115 file_name))
116 except Exception:
117 result = []
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000118 if not result:
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000119 result = None
120 return result
121
122
Stephen Hines86277eb2015-03-23 12:06:32 -0700123def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
Alexey Samsonov444a1852012-10-08 13:11:18 +0000124 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
125 if not symbolizer_path:
Alexander Potapenko2bcd3b52013-10-31 16:08:09 +0000126 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
127 if not symbolizer_path:
128 # Assume llvm-symbolizer is in PATH.
129 symbolizer_path = 'llvm-symbolizer'
Stephen Hines86277eb2015-03-23 12:06:32 -0700130 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000131
132
133class Addr2LineSymbolizer(Symbolizer):
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000134 def __init__(self, binary):
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000135 super(Addr2LineSymbolizer, self).__init__()
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000136 self.binary = binary
137 self.pipe = self.open_addr2line()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000138
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000139 def open_addr2line(self):
Stephen Hines6d186232014-11-26 17:56:19 -0800140 addr2line_tool = 'addr2line'
141 if binutils_prefix:
142 addr2line_tool = binutils_prefix + addr2line_tool
143 cmd = [addr2line_tool, '-f']
Kostya Serebryany8e32db42013-02-18 08:02:16 +0000144 if demangle:
145 cmd += ['--demangle']
146 cmd += ['-e', self.binary]
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000147 if DEBUG:
148 print ' '.join(cmd)
149 return subprocess.Popen(cmd,
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000150 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000151
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000152 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000153 """Overrides Symbolizer.symbolize."""
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000154 if self.binary != binary:
155 return None
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000156 try:
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000157 print >> self.pipe.stdin, offset
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000158 function_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000159 file_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenko8e398692012-08-02 13:59:23 +0000160 except Exception:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000161 function_name = ''
162 file_name = ''
Alexander Potapenko00424112012-01-26 17:06:50 +0000163 file_name = fix_filename(file_name)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000164 return ['%s in %s %s' % (addr, function_name, file_name)]
Alexander Potapenko00424112012-01-26 17:06:50 +0000165
166
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000167class UnbufferedLineConverter(object):
168 """
169 Wrap a child process that responds to each line of input with one line of
170 output. Uses pty to trick the child into providing unbuffered output.
171 """
Alexander Potapenko2e6a1fb2013-07-04 14:21:49 +0000172 def __init__(self, args, close_stderr=False):
Stephen Hines86277eb2015-03-23 12:06:32 -0700173 # Local imports so that the script can start on Windows.
174 import pty
175 import termios
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000176 pid, fd = pty.fork()
177 if pid == 0:
Alexander Potapenko2e6a1fb2013-07-04 14:21:49 +0000178 # We're the child. Transfer control to command.
179 if close_stderr:
180 dev_null = os.open('/dev/null', 0)
181 os.dup2(dev_null, 2)
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000182 os.execvp(args[0], args)
183 else:
184 # Disable echoing.
185 attr = termios.tcgetattr(fd)
186 attr[3] = attr[3] & ~termios.ECHO
187 termios.tcsetattr(fd, termios.TCSANOW, attr)
188 # Set up a file()-like interface to the child process
189 self.r = os.fdopen(fd, "r", 1)
190 self.w = os.fdopen(os.dup(fd), "w", 1)
191
192 def convert(self, line):
193 self.w.write(line + "\n")
194 return self.readline()
195
196 def readline(self):
197 return self.r.readline().rstrip()
198
199
Alexander Potapenko8e398692012-08-02 13:59:23 +0000200class DarwinSymbolizer(Symbolizer):
201 def __init__(self, addr, binary):
202 super(DarwinSymbolizer, self).__init__()
203 self.binary = binary
Stephen Hines6d186232014-11-26 17:56:19 -0800204 self.arch = guess_arch(addr)
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000205 self.open_atos()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000206
Alexander Potapenko8e398692012-08-02 13:59:23 +0000207 def open_atos(self):
208 if DEBUG:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000209 print 'atos -o %s -arch %s' % (self.binary, self.arch)
210 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
Alexander Potapenko2e6a1fb2013-07-04 14:21:49 +0000211 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000212
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000213 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000214 """Overrides Symbolizer.symbolize."""
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000215 if self.binary != binary:
216 return None
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000217 atos_line = self.atos.convert('0x%x' % int(offset, 16))
218 while "got symbolicator for" in atos_line:
219 atos_line = self.atos.readline()
Alexander Potapenko00424112012-01-26 17:06:50 +0000220 # A well-formed atos response looks like this:
221 # foo(type1, type2) (in object.name) (filename.cc:80)
222 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000223 if DEBUG:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000224 print 'atos_line: ', atos_line
Alexander Potapenko00424112012-01-26 17:06:50 +0000225 if match:
226 function_name = match.group(1)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000227 function_name = re.sub('\(.*?\)', '', function_name)
Alexander Potapenko00424112012-01-26 17:06:50 +0000228 file_name = fix_filename(match.group(3))
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000229 return ['%s in %s %s' % (addr, function_name, file_name)]
Alexander Potapenko00424112012-01-26 17:06:50 +0000230 else:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000231 return ['%s in %s' % (addr, atos_line)]
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000232
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000233
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000234# Chain several symbolizers so that if one symbolizer fails, we fall back
235# to the next symbolizer in chain.
Alexander Potapenko8e398692012-08-02 13:59:23 +0000236class ChainSymbolizer(Symbolizer):
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000237 def __init__(self, symbolizer_list):
Alexander Potapenko8e398692012-08-02 13:59:23 +0000238 super(ChainSymbolizer, self).__init__()
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000239 self.symbolizer_list = symbolizer_list
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000240
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000241 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000242 """Overrides Symbolizer.symbolize."""
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000243 for symbolizer in self.symbolizer_list:
244 if symbolizer:
245 result = symbolizer.symbolize(addr, binary, offset)
246 if result:
247 return result
248 return None
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000249
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000250 def append_symbolizer(self, symbolizer):
251 self.symbolizer_list.append(symbolizer)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000252
253
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000254def BreakpadSymbolizerFactory(binary):
255 suffix = os.getenv('BREAKPAD_SUFFIX')
Alexander Potapenko8e398692012-08-02 13:59:23 +0000256 if suffix:
257 filename = binary + suffix
258 if os.access(filename, os.F_OK):
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000259 return BreakpadSymbolizer(filename)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000260 return None
261
262
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000263def SystemSymbolizerFactory(system, addr, binary):
264 if system == 'Darwin':
265 return DarwinSymbolizer(addr, binary)
266 elif system == 'Linux':
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000267 return Addr2LineSymbolizer(binary)
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000268
269
Alexander Potapenko8e398692012-08-02 13:59:23 +0000270class BreakpadSymbolizer(Symbolizer):
271 def __init__(self, filename):
272 super(BreakpadSymbolizer, self).__init__()
273 self.filename = filename
274 lines = file(filename).readlines()
275 self.files = []
276 self.symbols = {}
277 self.address_list = []
278 self.addresses = {}
279 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
280 fragments = lines[0].rstrip().split()
281 self.arch = fragments[2]
282 self.debug_id = fragments[3]
283 self.binary = ' '.join(fragments[4:])
284 self.parse_lines(lines[1:])
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000285
Alexander Potapenko8e398692012-08-02 13:59:23 +0000286 def parse_lines(self, lines):
287 cur_function_addr = ''
288 for line in lines:
289 fragments = line.split()
290 if fragments[0] == 'FILE':
291 assert int(fragments[1]) == len(self.files)
292 self.files.append(' '.join(fragments[2:]))
293 elif fragments[0] == 'PUBLIC':
294 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000295 elif fragments[0] in ['CFI', 'STACK']:
Alexander Potapenko8e398692012-08-02 13:59:23 +0000296 pass
297 elif fragments[0] == 'FUNC':
298 cur_function_addr = int(fragments[1], 16)
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000299 if not cur_function_addr in self.symbols.keys():
300 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
Alexander Potapenko8e398692012-08-02 13:59:23 +0000301 else:
302 # Line starting with an address.
303 addr = int(fragments[0], 16)
304 self.address_list.append(addr)
305 # Tuple of symbol address, size, line, file number.
306 self.addresses[addr] = (cur_function_addr,
307 int(fragments[1], 16),
308 int(fragments[2]),
309 int(fragments[3]))
310 self.address_list.sort()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000311
Alexander Potapenko8e398692012-08-02 13:59:23 +0000312 def get_sym_file_line(self, addr):
313 key = None
314 if addr in self.addresses.keys():
315 key = addr
316 else:
317 index = bisect.bisect_left(self.address_list, addr)
318 if index == 0:
319 return None
320 else:
321 key = self.address_list[index - 1]
322 sym_id, size, line_no, file_no = self.addresses[key]
323 symbol = self.symbols[sym_id]
324 filename = self.files[file_no]
325 if addr < key + size:
326 return symbol, filename, line_no
327 else:
328 return None
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000329
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000330 def symbolize(self, addr, binary, offset):
331 if self.binary != binary:
332 return None
Alexander Potapenko8e398692012-08-02 13:59:23 +0000333 res = self.get_sym_file_line(int(offset, 16))
334 if res:
335 function_name, file_name, line_no = res
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000336 result = ['%s in %s %s:%d' % (
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000337 addr, function_name, file_name, line_no)]
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000338 print result
339 return result
Alexander Potapenko8e398692012-08-02 13:59:23 +0000340 else:
341 return None
342
343
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000344class SymbolizationLoop(object):
Stephen Hines86277eb2015-03-23 12:06:32 -0700345 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
346 if sys.platform == 'win32':
347 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
348 # even in sandboxed processes. Nothing needs to be done here.
349 self.process_line = self.process_line_echo
350 else:
351 # Used by clients who may want to supply a different binary name.
352 # E.g. in Chrome several binaries may share a single .dSYM.
353 self.binary_name_filter = binary_name_filter
354 self.dsym_hint_producer = dsym_hint_producer
355 self.system = os.uname()[0]
356 if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
357 raise Exception('Unknown system')
358 self.llvm_symbolizers = {}
359 self.last_llvm_symbolizer = None
360 self.dsym_hints = set([])
361 self.frame_no = 0
362 self.process_line = self.process_line_posix
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000363
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000364 def symbolize_address(self, addr, binary, offset):
Stephen Hines86277eb2015-03-23 12:06:32 -0700365 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
366 # a single symbolizer binary.
367 # On Darwin, if the dsym hint producer is present:
368 # 1. check whether we've seen this binary already; if so,
369 # use |llvm_symbolizers[binary]|, which has already loaded the debug
370 # info for this binary (might not be the case for
371 # |last_llvm_symbolizer|);
372 # 2. otherwise check if we've seen all the hints for this binary already;
373 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
374 # 3. otherwise create a new symbolizer and pass all currently known
375 # .dSYM hints to it.
376 if not binary in self.llvm_symbolizers:
377 use_new_symbolizer = True
378 if self.system == 'Darwin' and self.dsym_hint_producer:
379 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
380 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
381 self.dsym_hints |= dsym_hints_for_binary
382 if self.last_llvm_symbolizer and not use_new_symbolizer:
383 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
384 else:
385 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
386 self.system, guess_arch(addr), self.dsym_hints)
387 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000388 # Use the chain of symbolizers:
389 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
390 # (fall back to next symbolizer if the previous one fails).
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000391 if not binary in symbolizers:
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000392 symbolizers[binary] = ChainSymbolizer(
Stephen Hines86277eb2015-03-23 12:06:32 -0700393 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000394 result = symbolizers[binary].symbolize(addr, binary, offset)
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000395 if result is None:
Pirama Arumuga Nainar7c915052015-04-08 08:58:29 -0700396 if not allow_system_symbolizer:
397 raise Exception('Failed to launch or use llvm-symbolizer.')
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000398 # Initialize system symbolizer only if other symbolizers failed.
399 symbolizers[binary].append_symbolizer(
400 SystemSymbolizerFactory(self.system, addr, binary))
401 result = symbolizers[binary].symbolize(addr, binary, offset)
402 # The system symbolizer must produce some result.
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000403 assert result
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000404 return result
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000405
Stephen Hines6d186232014-11-26 17:56:19 -0800406 def get_symbolized_lines(self, symbolized_lines):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000407 if not symbolized_lines:
Stephen Hines6d186232014-11-26 17:56:19 -0800408 return [self.current_line]
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000409 else:
Stephen Hines6d186232014-11-26 17:56:19 -0800410 result = []
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000411 for symbolized_frame in symbolized_lines:
Stephen Hines6d186232014-11-26 17:56:19 -0800412 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000413 self.frame_no += 1
Stephen Hines6d186232014-11-26 17:56:19 -0800414 return result
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000415
Stephen Hines6d186232014-11-26 17:56:19 -0800416 def process_logfile(self):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000417 self.frame_no = 0
Stephen Hines86277eb2015-03-23 12:06:32 -0700418 for line in logfile:
Stephen Hines6d186232014-11-26 17:56:19 -0800419 processed = self.process_line(line)
420 print '\n'.join(processed)
421
Stephen Hines86277eb2015-03-23 12:06:32 -0700422 def process_line_echo(self, line):
423 return [line.rstrip()]
424
425 def process_line_posix(self, line):
Stephen Hines6d186232014-11-26 17:56:19 -0800426 self.current_line = line.rstrip()
427 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
428 stack_trace_line_format = (
429 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
430 match = re.match(stack_trace_line_format, line)
431 if not match:
432 return [self.current_line]
433 if DEBUG:
434 print line
435 _, frameno_str, addr, binary, offset = match.groups()
436 if frameno_str == '0':
437 # Assume that frame #0 is the first frame of new stack trace.
438 self.frame_no = 0
439 original_binary = binary
440 if self.binary_name_filter:
441 binary = self.binary_name_filter(binary)
442 symbolized_line = self.symbolize_address(addr, binary, offset)
443 if not symbolized_line:
444 if original_binary != binary:
445 symbolized_line = self.symbolize_address(addr, binary, offset)
446 return self.get_symbolized_lines(symbolized_line)
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000447
448
449if __name__ == '__main__':
Stephen Hines86277eb2015-03-23 12:06:32 -0700450 parser = argparse.ArgumentParser(
451 formatter_class=argparse.RawDescriptionHelpFormatter,
452 description='ASan symbolization script',
453 epilog='Example of use:\n'
454 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
455 '-s "$HOME/SymbolFiles" < asan.log')
Stephen Hines6d186232014-11-26 17:56:19 -0800456 parser.add_argument('path_to_cut', nargs='*',
Stephen Hines86277eb2015-03-23 12:06:32 -0700457 help='pattern to be cut from the result file path ')
Stephen Hines6d186232014-11-26 17:56:19 -0800458 parser.add_argument('-d','--demangle', action='store_true',
Stephen Hines86277eb2015-03-23 12:06:32 -0700459 help='demangle function names')
Stephen Hines6d186232014-11-26 17:56:19 -0800460 parser.add_argument('-s', metavar='SYSROOT',
Stephen Hines86277eb2015-03-23 12:06:32 -0700461 help='set path to sysroot for sanitized binaries')
Stephen Hines6d186232014-11-26 17:56:19 -0800462 parser.add_argument('-c', metavar='CROSS_COMPILE',
Stephen Hines86277eb2015-03-23 12:06:32 -0700463 help='set prefix for binutils')
464 parser.add_argument('-l','--logfile', default=sys.stdin,
465 type=argparse.FileType('r'),
466 help='set log file name to parse, default is stdin')
Stephen Hines6d186232014-11-26 17:56:19 -0800467 args = parser.parse_args()
468 if args.path_to_cut:
469 fix_filename_patterns = args.path_to_cut
470 if args.demangle:
471 demangle = True
472 if args.s:
473 binary_name_filter = sysroot_path_filter
474 sysroot_path = args.s
475 if args.c:
476 binutils_prefix = args.c
477 if args.logfile:
478 logfile = args.logfile
479 else:
480 logfile = sys.stdin
481 loop = SymbolizationLoop(binary_name_filter)
482 loop.process_logfile()