blob: e6d43cd3f4ad7d0c0d4f946b3a7feb52fae9d5e0 [file] [log] [blame]
Kostya Serebryany1e172b42011-11-30 01:07:02 +00001#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
Stephen Hines6d186232014-11-26 17:56:19 -080010import argparse
Alexander Potapenko8e398692012-08-02 13:59:23 +000011import bisect
Kostya Serebryany8e32db42013-02-18 08:02:16 +000012import getopt
Kostya Serebryany1e172b42011-11-30 01:07:02 +000013import os
14import re
Kostya Serebryany1e172b42011-11-30 01:07:02 +000015import subprocess
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000016import sys
Kostya Serebryany1e172b42011-11-30 01:07:02 +000017
Alexander Potapenko897e89f2012-08-02 14:58:04 +000018symbolizers = {}
Alexander Potapenko8e398692012-08-02 13:59:23 +000019DEBUG = False
Stephen Hines6d186232014-11-26 17:56:19 -080020demangle = False
21binutils_prefix = None
22sysroot_path = None
23binary_name_filter = None
24fix_filename_patterns = None
25logfile = sys.stdin
Pirama Arumuga Nainar7c915052015-04-08 08:58:29 -070026allow_system_symbolizer = True
Alexander Potapenko970a9b92012-07-31 13:51:26 +000027
Alexander Potapenkob2546c42012-09-26 12:12:41 +000028# FIXME: merge the code that calls fix_filename().
Alexander Potapenko00424112012-01-26 17:06:50 +000029def fix_filename(file_name):
Stephen Hines6d186232014-11-26 17:56:19 -080030 if fix_filename_patterns:
31 for path_to_cut in fix_filename_patterns:
32 file_name = re.sub('.*' + path_to_cut, '', file_name)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000033 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
34 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
Alexander Potapenko00424112012-01-26 17:06:50 +000035 return file_name
36
Stephen Hines6d186232014-11-26 17:56:19 -080037def sysroot_path_filter(binary_name):
38 return sysroot_path + binary_name
39
40def guess_arch(addr):
Stephen Hines2d1fdb22014-05-28 23:58:16 -070041 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
42 if len(addr) > 10:
43 return 'x86_64'
44 else:
45 return 'i386'
Alexander Potapenko00424112012-01-26 17:06:50 +000046
Alexander Potapenko897e89f2012-08-02 14:58:04 +000047class Symbolizer(object):
48 def __init__(self):
49 pass
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000050
Alexey Samsonov63e4df42012-09-19 08:49:53 +000051 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000052 """Symbolize the given address (pair of binary and offset).
53
54 Overriden in subclasses.
55 Args:
Alexey Samsonov63e4df42012-09-19 08:49:53 +000056 addr: virtual address of an instruction.
57 binary: path to executable/shared object containing this instruction.
58 offset: instruction offset in the @binary.
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000059 Returns:
Alexey Samsonov63e4df42012-09-19 08:49:53 +000060 list of strings (one string for each inlined frame) describing
61 the code locations for this instruction (that is, function name, file
62 name, line and column numbers).
63 """
64 return None
Alexander Potapenko897e89f2012-08-02 14:58:04 +000065
66
Alexey Samsonov63e4df42012-09-19 08:49:53 +000067class LLVMSymbolizer(Symbolizer):
Stephen Hines86277eb2015-03-23 12:06:32 -070068 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]):
Alexey Samsonov63e4df42012-09-19 08:49:53 +000069 super(LLVMSymbolizer, self).__init__()
70 self.symbolizer_path = symbolizer_path
Stephen Hines86277eb2015-03-23 12:06:32 -070071 self.default_arch = default_arch
72 self.system = system
73 self.dsym_hints = dsym_hints
Alexey Samsonov63e4df42012-09-19 08:49:53 +000074 self.pipe = self.open_llvm_symbolizer()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000075
Alexey Samsonov63e4df42012-09-19 08:49:53 +000076 def open_llvm_symbolizer(self):
77 cmd = [self.symbolizer_path,
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000078 '--use-symbol-table=true',
Kostya Serebryany8e32db42013-02-18 08:02:16 +000079 '--demangle=%s' % demangle,
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -080080 '--functions=linkage',
Stephen Hines2d1fdb22014-05-28 23:58:16 -070081 '--inlining=true',
82 '--default-arch=%s' % self.default_arch]
Stephen Hines86277eb2015-03-23 12:06:32 -070083 if self.system == 'Darwin':
84 for hint in self.dsym_hints:
85 cmd.append('--dsym-hint=%s' % hint)
Alexey Samsonov63e4df42012-09-19 08:49:53 +000086 if DEBUG:
87 print ' '.join(cmd)
Stephen Hines2d1fdb22014-05-28 23:58:16 -070088 try:
89 result = subprocess.Popen(cmd, stdin=subprocess.PIPE,
90 stdout=subprocess.PIPE)
91 except OSError:
92 result = None
93 return result
Alexey Samsonov63e4df42012-09-19 08:49:53 +000094
95 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +000096 """Overrides Symbolizer.symbolize."""
Alexey Samsonov75317312012-09-19 11:43:41 +000097 if not self.pipe:
98 return None
Alexey Samsonov63e4df42012-09-19 08:49:53 +000099 result = []
100 try:
Stephen Hines86277eb2015-03-23 12:06:32 -0700101 symbolizer_input = '"%s" %s' % (binary, offset)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000102 if DEBUG:
103 print symbolizer_input
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000104 print >> self.pipe.stdin, symbolizer_input
105 while True:
106 function_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000107 if not function_name:
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000108 break
109 file_name = self.pipe.stdout.readline().rstrip()
110 file_name = fix_filename(file_name)
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700111 if (not function_name.startswith('??') or
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000112 not file_name.startswith('??')):
Stephen Hines2d1fdb22014-05-28 23:58:16 -0700113 # Append only non-trivial frames.
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000114 result.append('%s in %s %s' % (addr, function_name,
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000115 file_name))
116 except Exception:
117 result = []
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000118 if not result:
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000119 result = None
120 return result
121
122
Stephen Hines86277eb2015-03-23 12:06:32 -0700123def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]):
Alexey Samsonov444a1852012-10-08 13:11:18 +0000124 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
125 if not symbolizer_path:
Alexander Potapenko2bcd3b52013-10-31 16:08:09 +0000126 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH')
127 if not symbolizer_path:
128 # Assume llvm-symbolizer is in PATH.
129 symbolizer_path = 'llvm-symbolizer'
Stephen Hines86277eb2015-03-23 12:06:32 -0700130 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints)
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000131
132
133class Addr2LineSymbolizer(Symbolizer):
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000134 def __init__(self, binary):
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000135 super(Addr2LineSymbolizer, self).__init__()
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000136 self.binary = binary
137 self.pipe = self.open_addr2line()
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800138 self.output_terminator = -1
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000139
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000140 def open_addr2line(self):
Stephen Hines6d186232014-11-26 17:56:19 -0800141 addr2line_tool = 'addr2line'
142 if binutils_prefix:
143 addr2line_tool = binutils_prefix + addr2line_tool
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800144 cmd = [addr2line_tool, '-fi']
Kostya Serebryany8e32db42013-02-18 08:02:16 +0000145 if demangle:
146 cmd += ['--demangle']
147 cmd += ['-e', self.binary]
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000148 if DEBUG:
149 print ' '.join(cmd)
150 return subprocess.Popen(cmd,
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000151 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000152
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000153 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000154 """Overrides Symbolizer.symbolize."""
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000155 if self.binary != binary:
156 return None
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800157 lines = []
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000158 try:
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000159 print >> self.pipe.stdin, offset
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800160 print >> self.pipe.stdin, self.output_terminator
161 is_first_frame = True
162 while True:
163 function_name = self.pipe.stdout.readline().rstrip()
164 file_name = self.pipe.stdout.readline().rstrip()
165 if is_first_frame:
166 is_first_frame = False
167 elif function_name in ['', '??']:
168 assert file_name == function_name
169 break
170 lines.append((function_name, file_name));
Alexander Potapenko8e398692012-08-02 13:59:23 +0000171 except Exception:
Pirama Arumuga Nainar799172d2016-03-03 15:50:30 -0800172 lines.append(('??', '??:0'))
173 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines]
Alexander Potapenko00424112012-01-26 17:06:50 +0000174
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000175class UnbufferedLineConverter(object):
176 """
177 Wrap a child process that responds to each line of input with one line of
178 output. Uses pty to trick the child into providing unbuffered output.
179 """
Alexander Potapenko2e6a1fb2013-07-04 14:21:49 +0000180 def __init__(self, args, close_stderr=False):
Stephen Hines86277eb2015-03-23 12:06:32 -0700181 # Local imports so that the script can start on Windows.
182 import pty
183 import termios
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000184 pid, fd = pty.fork()
185 if pid == 0:
Alexander Potapenko2e6a1fb2013-07-04 14:21:49 +0000186 # We're the child. Transfer control to command.
187 if close_stderr:
188 dev_null = os.open('/dev/null', 0)
189 os.dup2(dev_null, 2)
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000190 os.execvp(args[0], args)
191 else:
192 # Disable echoing.
193 attr = termios.tcgetattr(fd)
194 attr[3] = attr[3] & ~termios.ECHO
195 termios.tcsetattr(fd, termios.TCSANOW, attr)
196 # Set up a file()-like interface to the child process
197 self.r = os.fdopen(fd, "r", 1)
198 self.w = os.fdopen(os.dup(fd), "w", 1)
199
200 def convert(self, line):
201 self.w.write(line + "\n")
202 return self.readline()
203
204 def readline(self):
205 return self.r.readline().rstrip()
206
207
Alexander Potapenko8e398692012-08-02 13:59:23 +0000208class DarwinSymbolizer(Symbolizer):
209 def __init__(self, addr, binary):
210 super(DarwinSymbolizer, self).__init__()
211 self.binary = binary
Stephen Hines6d186232014-11-26 17:56:19 -0800212 self.arch = guess_arch(addr)
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000213 self.open_atos()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000214
Alexander Potapenko8e398692012-08-02 13:59:23 +0000215 def open_atos(self):
216 if DEBUG:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000217 print 'atos -o %s -arch %s' % (self.binary, self.arch)
218 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
Alexander Potapenko2e6a1fb2013-07-04 14:21:49 +0000219 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000220
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000221 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000222 """Overrides Symbolizer.symbolize."""
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000223 if self.binary != binary:
224 return None
Alexander Potapenkof21e0252013-07-01 10:51:31 +0000225 atos_line = self.atos.convert('0x%x' % int(offset, 16))
226 while "got symbolicator for" in atos_line:
227 atos_line = self.atos.readline()
Alexander Potapenko00424112012-01-26 17:06:50 +0000228 # A well-formed atos response looks like this:
229 # foo(type1, type2) (in object.name) (filename.cc:80)
230 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000231 if DEBUG:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000232 print 'atos_line: ', atos_line
Alexander Potapenko00424112012-01-26 17:06:50 +0000233 if match:
234 function_name = match.group(1)
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000235 function_name = re.sub('\(.*?\)', '', function_name)
Alexander Potapenko00424112012-01-26 17:06:50 +0000236 file_name = fix_filename(match.group(3))
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000237 return ['%s in %s %s' % (addr, function_name, file_name)]
Alexander Potapenko00424112012-01-26 17:06:50 +0000238 else:
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000239 return ['%s in %s' % (addr, atos_line)]
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000240
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000241
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000242# Chain several symbolizers so that if one symbolizer fails, we fall back
243# to the next symbolizer in chain.
Alexander Potapenko8e398692012-08-02 13:59:23 +0000244class ChainSymbolizer(Symbolizer):
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000245 def __init__(self, symbolizer_list):
Alexander Potapenko8e398692012-08-02 13:59:23 +0000246 super(ChainSymbolizer, self).__init__()
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000247 self.symbolizer_list = symbolizer_list
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000248
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000249 def symbolize(self, addr, binary, offset):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000250 """Overrides Symbolizer.symbolize."""
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000251 for symbolizer in self.symbolizer_list:
252 if symbolizer:
253 result = symbolizer.symbolize(addr, binary, offset)
254 if result:
255 return result
256 return None
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000257
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000258 def append_symbolizer(self, symbolizer):
259 self.symbolizer_list.append(symbolizer)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000260
261
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000262def BreakpadSymbolizerFactory(binary):
263 suffix = os.getenv('BREAKPAD_SUFFIX')
Alexander Potapenko8e398692012-08-02 13:59:23 +0000264 if suffix:
265 filename = binary + suffix
266 if os.access(filename, os.F_OK):
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000267 return BreakpadSymbolizer(filename)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000268 return None
269
270
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000271def SystemSymbolizerFactory(system, addr, binary):
272 if system == 'Darwin':
273 return DarwinSymbolizer(addr, binary)
274 elif system == 'Linux':
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000275 return Addr2LineSymbolizer(binary)
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000276
277
Alexander Potapenko8e398692012-08-02 13:59:23 +0000278class BreakpadSymbolizer(Symbolizer):
279 def __init__(self, filename):
280 super(BreakpadSymbolizer, self).__init__()
281 self.filename = filename
282 lines = file(filename).readlines()
283 self.files = []
284 self.symbols = {}
285 self.address_list = []
286 self.addresses = {}
287 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
288 fragments = lines[0].rstrip().split()
289 self.arch = fragments[2]
290 self.debug_id = fragments[3]
291 self.binary = ' '.join(fragments[4:])
292 self.parse_lines(lines[1:])
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000293
Alexander Potapenko8e398692012-08-02 13:59:23 +0000294 def parse_lines(self, lines):
295 cur_function_addr = ''
296 for line in lines:
297 fragments = line.split()
298 if fragments[0] == 'FILE':
299 assert int(fragments[1]) == len(self.files)
300 self.files.append(' '.join(fragments[2:]))
301 elif fragments[0] == 'PUBLIC':
302 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000303 elif fragments[0] in ['CFI', 'STACK']:
Alexander Potapenko8e398692012-08-02 13:59:23 +0000304 pass
305 elif fragments[0] == 'FUNC':
306 cur_function_addr = int(fragments[1], 16)
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000307 if not cur_function_addr in self.symbols.keys():
308 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
Alexander Potapenko8e398692012-08-02 13:59:23 +0000309 else:
310 # Line starting with an address.
311 addr = int(fragments[0], 16)
312 self.address_list.append(addr)
313 # Tuple of symbol address, size, line, file number.
314 self.addresses[addr] = (cur_function_addr,
315 int(fragments[1], 16),
316 int(fragments[2]),
317 int(fragments[3]))
318 self.address_list.sort()
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000319
Alexander Potapenko8e398692012-08-02 13:59:23 +0000320 def get_sym_file_line(self, addr):
321 key = None
322 if addr in self.addresses.keys():
323 key = addr
324 else:
325 index = bisect.bisect_left(self.address_list, addr)
326 if index == 0:
327 return None
328 else:
329 key = self.address_list[index - 1]
330 sym_id, size, line_no, file_no = self.addresses[key]
331 symbol = self.symbols[sym_id]
332 filename = self.files[file_no]
333 if addr < key + size:
334 return symbol, filename, line_no
335 else:
336 return None
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000337
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000338 def symbolize(self, addr, binary, offset):
339 if self.binary != binary:
340 return None
Alexander Potapenko8e398692012-08-02 13:59:23 +0000341 res = self.get_sym_file_line(int(offset, 16))
342 if res:
343 function_name, file_name, line_no = res
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000344 result = ['%s in %s %s:%d' % (
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000345 addr, function_name, file_name, line_no)]
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000346 print result
347 return result
Alexander Potapenko8e398692012-08-02 13:59:23 +0000348 else:
349 return None
350
351
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000352class SymbolizationLoop(object):
Stephen Hines86277eb2015-03-23 12:06:32 -0700353 def __init__(self, binary_name_filter=None, dsym_hint_producer=None):
354 if sys.platform == 'win32':
355 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works
356 # even in sandboxed processes. Nothing needs to be done here.
357 self.process_line = self.process_line_echo
358 else:
359 # Used by clients who may want to supply a different binary name.
360 # E.g. in Chrome several binaries may share a single .dSYM.
361 self.binary_name_filter = binary_name_filter
362 self.dsym_hint_producer = dsym_hint_producer
363 self.system = os.uname()[0]
364 if self.system not in ['Linux', 'Darwin', 'FreeBSD']:
365 raise Exception('Unknown system')
366 self.llvm_symbolizers = {}
367 self.last_llvm_symbolizer = None
368 self.dsym_hints = set([])
369 self.frame_no = 0
370 self.process_line = self.process_line_posix
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000371
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000372 def symbolize_address(self, addr, binary, offset):
Stephen Hines86277eb2015-03-23 12:06:32 -0700373 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use
374 # a single symbolizer binary.
375 # On Darwin, if the dsym hint producer is present:
376 # 1. check whether we've seen this binary already; if so,
377 # use |llvm_symbolizers[binary]|, which has already loaded the debug
378 # info for this binary (might not be the case for
379 # |last_llvm_symbolizer|);
380 # 2. otherwise check if we've seen all the hints for this binary already;
381 # if so, reuse |last_llvm_symbolizer| which has the full set of hints;
382 # 3. otherwise create a new symbolizer and pass all currently known
383 # .dSYM hints to it.
384 if not binary in self.llvm_symbolizers:
385 use_new_symbolizer = True
386 if self.system == 'Darwin' and self.dsym_hint_producer:
387 dsym_hints_for_binary = set(self.dsym_hint_producer(binary))
388 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints)
389 self.dsym_hints |= dsym_hints_for_binary
390 if self.last_llvm_symbolizer and not use_new_symbolizer:
391 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
392 else:
393 self.last_llvm_symbolizer = LLVMSymbolizerFactory(
394 self.system, guess_arch(addr), self.dsym_hints)
395 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000396 # Use the chain of symbolizers:
397 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
398 # (fall back to next symbolizer if the previous one fails).
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000399 if not binary in symbolizers:
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000400 symbolizers[binary] = ChainSymbolizer(
Stephen Hines86277eb2015-03-23 12:06:32 -0700401 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]])
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000402 result = symbolizers[binary].symbolize(addr, binary, offset)
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000403 if result is None:
Pirama Arumuga Nainar7c915052015-04-08 08:58:29 -0700404 if not allow_system_symbolizer:
405 raise Exception('Failed to launch or use llvm-symbolizer.')
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000406 # Initialize system symbolizer only if other symbolizers failed.
407 symbolizers[binary].append_symbolizer(
408 SystemSymbolizerFactory(self.system, addr, binary))
409 result = symbolizers[binary].symbolize(addr, binary, offset)
410 # The system symbolizer must produce some result.
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000411 assert result
Alexander Potapenkob2546c42012-09-26 12:12:41 +0000412 return result
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000413
Stephen Hines6d186232014-11-26 17:56:19 -0800414 def get_symbolized_lines(self, symbolized_lines):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000415 if not symbolized_lines:
Stephen Hines6d186232014-11-26 17:56:19 -0800416 return [self.current_line]
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000417 else:
Stephen Hines6d186232014-11-26 17:56:19 -0800418 result = []
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000419 for symbolized_frame in symbolized_lines:
Stephen Hines6d186232014-11-26 17:56:19 -0800420 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip()))
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000421 self.frame_no += 1
Stephen Hines6d186232014-11-26 17:56:19 -0800422 return result
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000423
Stephen Hines6d186232014-11-26 17:56:19 -0800424 def process_logfile(self):
Alexander Potapenkoce31aa72012-09-26 13:16:42 +0000425 self.frame_no = 0
Stephen Hines86277eb2015-03-23 12:06:32 -0700426 for line in logfile:
Stephen Hines6d186232014-11-26 17:56:19 -0800427 processed = self.process_line(line)
428 print '\n'.join(processed)
429
Stephen Hines86277eb2015-03-23 12:06:32 -0700430 def process_line_echo(self, line):
431 return [line.rstrip()]
432
433 def process_line_posix(self, line):
Stephen Hines6d186232014-11-26 17:56:19 -0800434 self.current_line = line.rstrip()
435 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
436 stack_trace_line_format = (
437 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
438 match = re.match(stack_trace_line_format, line)
439 if not match:
440 return [self.current_line]
441 if DEBUG:
442 print line
443 _, frameno_str, addr, binary, offset = match.groups()
444 if frameno_str == '0':
445 # Assume that frame #0 is the first frame of new stack trace.
446 self.frame_no = 0
447 original_binary = binary
448 if self.binary_name_filter:
449 binary = self.binary_name_filter(binary)
450 symbolized_line = self.symbolize_address(addr, binary, offset)
451 if not symbolized_line:
452 if original_binary != binary:
453 symbolized_line = self.symbolize_address(addr, binary, offset)
454 return self.get_symbolized_lines(symbolized_line)
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000455
456
457if __name__ == '__main__':
Stephen Hines86277eb2015-03-23 12:06:32 -0700458 parser = argparse.ArgumentParser(
459 formatter_class=argparse.RawDescriptionHelpFormatter,
460 description='ASan symbolization script',
461 epilog='Example of use:\n'
462 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" '
463 '-s "$HOME/SymbolFiles" < asan.log')
Stephen Hines6d186232014-11-26 17:56:19 -0800464 parser.add_argument('path_to_cut', nargs='*',
Stephen Hines86277eb2015-03-23 12:06:32 -0700465 help='pattern to be cut from the result file path ')
Stephen Hines6d186232014-11-26 17:56:19 -0800466 parser.add_argument('-d','--demangle', action='store_true',
Stephen Hines86277eb2015-03-23 12:06:32 -0700467 help='demangle function names')
Stephen Hines6d186232014-11-26 17:56:19 -0800468 parser.add_argument('-s', metavar='SYSROOT',
Stephen Hines86277eb2015-03-23 12:06:32 -0700469 help='set path to sysroot for sanitized binaries')
Stephen Hines6d186232014-11-26 17:56:19 -0800470 parser.add_argument('-c', metavar='CROSS_COMPILE',
Stephen Hines86277eb2015-03-23 12:06:32 -0700471 help='set prefix for binutils')
472 parser.add_argument('-l','--logfile', default=sys.stdin,
473 type=argparse.FileType('r'),
474 help='set log file name to parse, default is stdin')
Stephen Hines6d186232014-11-26 17:56:19 -0800475 args = parser.parse_args()
476 if args.path_to_cut:
477 fix_filename_patterns = args.path_to_cut
478 if args.demangle:
479 demangle = True
480 if args.s:
481 binary_name_filter = sysroot_path_filter
482 sysroot_path = args.s
483 if args.c:
484 binutils_prefix = args.c
485 if args.logfile:
486 logfile = args.logfile
487 else:
488 logfile = sys.stdin
489 loop = SymbolizationLoop(binary_name_filter)
490 loop.process_logfile()