blob: 7b30bb55914eee5d1c08015a1cd0a08cd7f58487 [file] [log] [blame]
Kostya Serebryany019b76f2011-11-30 01:07:02 +00001#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000010import bisect
Kostya Serebryany019b76f2011-11-30 01:07:02 +000011import os
12import re
Kostya Serebryany019b76f2011-11-30 01:07:02 +000013import subprocess
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000014import sys
Kostya Serebryany019b76f2011-11-30 01:07:02 +000015
Alexey Samsonov52565d52012-09-19 08:49:53 +000016llvm_symbolizer = None
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000017symbolizers = {}
Alexander Potapenko02a71622012-01-26 17:06:50 +000018filetypes = {}
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000019vmaddrs = {}
20DEBUG = False
Kostya Serebryany019b76f2011-11-30 01:07:02 +000021
Alexander Potapenko8aae9552012-07-31 13:51:26 +000022
Alexander Potapenko59cc8772012-09-26 12:12:41 +000023# FIXME: merge the code that calls fix_filename().
Alexander Potapenko02a71622012-01-26 17:06:50 +000024def fix_filename(file_name):
25 for path_to_cut in sys.argv[1:]:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000026 file_name = re.sub('.*' + path_to_cut, '', file_name)
27 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name)
28 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name)
Alexander Potapenko02a71622012-01-26 17:06:50 +000029 return file_name
30
31
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000032class Symbolizer(object):
33 def __init__(self):
34 pass
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000035
Alexey Samsonov52565d52012-09-19 08:49:53 +000036 def symbolize(self, addr, binary, offset):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000037 """Symbolize the given address (pair of binary and offset).
38
39 Overriden in subclasses.
40 Args:
Alexey Samsonov52565d52012-09-19 08:49:53 +000041 addr: virtual address of an instruction.
42 binary: path to executable/shared object containing this instruction.
43 offset: instruction offset in the @binary.
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000044 Returns:
Alexey Samsonov52565d52012-09-19 08:49:53 +000045 list of strings (one string for each inlined frame) describing
46 the code locations for this instruction (that is, function name, file
47 name, line and column numbers).
48 """
49 return None
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000050
51
Alexey Samsonov52565d52012-09-19 08:49:53 +000052class LLVMSymbolizer(Symbolizer):
53 def __init__(self, symbolizer_path):
54 super(LLVMSymbolizer, self).__init__()
55 self.symbolizer_path = symbolizer_path
56 self.pipe = self.open_llvm_symbolizer()
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000057
Alexey Samsonov52565d52012-09-19 08:49:53 +000058 def open_llvm_symbolizer(self):
Alexey Samsonovea2fb082012-09-19 11:43:41 +000059 if not os.path.exists(self.symbolizer_path):
60 return None
Alexey Samsonov52565d52012-09-19 08:49:53 +000061 cmd = [self.symbolizer_path,
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000062 '--use-symbol-table=true',
63 '--demangle=false',
64 '--functions=true',
65 '--inlining=true']
Alexey Samsonov52565d52012-09-19 08:49:53 +000066 if DEBUG:
67 print ' '.join(cmd)
68 return subprocess.Popen(cmd, stdin=subprocess.PIPE,
69 stdout=subprocess.PIPE)
70
71 def symbolize(self, addr, binary, offset):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000072 """Overrides Symbolizer.symbolize."""
Alexey Samsonovea2fb082012-09-19 11:43:41 +000073 if not self.pipe:
74 return None
Alexey Samsonov52565d52012-09-19 08:49:53 +000075 result = []
76 try:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000077 symbolizer_input = '%s %s' % (binary, offset)
78 if DEBUG:
79 print symbolizer_input
Alexey Samsonov52565d52012-09-19 08:49:53 +000080 print >> self.pipe.stdin, symbolizer_input
81 while True:
82 function_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000083 if not function_name:
Alexey Samsonov52565d52012-09-19 08:49:53 +000084 break
85 file_name = self.pipe.stdout.readline().rstrip()
86 file_name = fix_filename(file_name)
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000087 if (not function_name.startswith('??') and
88 not file_name.startswith('??')):
Alexey Samsonov52565d52012-09-19 08:49:53 +000089 # Append only valid frames.
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000090 result.append('%s in %s %s' % (addr, function_name,
Alexey Samsonov52565d52012-09-19 08:49:53 +000091 file_name))
92 except Exception:
93 result = []
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +000094 if not result:
Alexey Samsonov52565d52012-09-19 08:49:53 +000095 result = None
96 return result
97
98
99def LLVMSymbolizerFactory(system):
Alexey Samsonov480477c2012-10-08 13:11:18 +0000100 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH')
101 if not symbolizer_path:
102 # Assume llvm-symbolizer is in PATH.
103 symbolizer_path = 'llvm-symbolizer'
104 return LLVMSymbolizer(symbolizer_path)
Alexey Samsonov52565d52012-09-19 08:49:53 +0000105
106
107class Addr2LineSymbolizer(Symbolizer):
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000108 def __init__(self, binary):
Alexey Samsonov52565d52012-09-19 08:49:53 +0000109 super(Addr2LineSymbolizer, self).__init__()
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000110 self.binary = binary
111 self.pipe = self.open_addr2line()
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000112
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000113 def open_addr2line(self):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000114 cmd = ['addr2line', '-f', '-e', self.binary]
Alexander Potapenko18003622012-08-15 13:58:24 +0000115 if DEBUG:
116 print ' '.join(cmd)
117 return subprocess.Popen(cmd,
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000118 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000119
Alexey Samsonov52565d52012-09-19 08:49:53 +0000120 def symbolize(self, addr, binary, offset):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000121 """Overrides Symbolizer.symbolize."""
Alexey Samsonov52565d52012-09-19 08:49:53 +0000122 if self.binary != binary:
123 return None
Kostya Serebryany019b76f2011-11-30 01:07:02 +0000124 try:
Alexander Potapenko18003622012-08-15 13:58:24 +0000125 print >> self.pipe.stdin, offset
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000126 function_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000127 file_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000128 except Exception:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000129 function_name = ''
130 file_name = ''
Alexander Potapenko02a71622012-01-26 17:06:50 +0000131 file_name = fix_filename(file_name)
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000132 return ['%s in %s %s' % (addr, function_name, file_name)]
Alexander Potapenko02a71622012-01-26 17:06:50 +0000133
134
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000135class DarwinSymbolizer(Symbolizer):
136 def __init__(self, addr, binary):
137 super(DarwinSymbolizer, self).__init__()
138 self.binary = binary
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000139 # Guess which arch we're running. 10 = len('0x') + 8 hex digits.
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000140 if len(addr) > 10:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000141 self.arch = 'x86_64'
Kostya Serebryany019b76f2011-11-30 01:07:02 +0000142 else:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000143 self.arch = 'i386'
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000144 self.vmaddr = None
145 self.pipe = None
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000146
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000147 def write_addr_to_pipe(self, offset):
Alexander Potapenko77c0ac22012-10-02 15:42:24 +0000148 print >> self.pipe.stdin, '0x%x' % int(offset, 16)
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000149
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000150 def open_atos(self):
151 if DEBUG:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000152 print 'atos -o %s -arch %s' % (self.binary, self.arch)
153 cmdline = ['atos', '-o', self.binary, '-arch', self.arch]
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000154 self.pipe = subprocess.Popen(cmdline,
155 stdin=subprocess.PIPE,
156 stdout=subprocess.PIPE,
157 stderr=subprocess.PIPE)
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000158
Alexey Samsonov52565d52012-09-19 08:49:53 +0000159 def symbolize(self, addr, binary, offset):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000160 """Overrides Symbolizer.symbolize."""
Alexey Samsonov52565d52012-09-19 08:49:53 +0000161 if self.binary != binary:
162 return None
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000163 self.open_atos()
164 self.write_addr_to_pipe(offset)
165 self.pipe.stdin.close()
166 atos_line = self.pipe.stdout.readline().rstrip()
Alexander Potapenko02a71622012-01-26 17:06:50 +0000167 # A well-formed atos response looks like this:
168 # foo(type1, type2) (in object.name) (filename.cc:80)
169 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000170 if DEBUG:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000171 print 'atos_line: ', atos_line
Alexander Potapenko02a71622012-01-26 17:06:50 +0000172 if match:
173 function_name = match.group(1)
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000174 function_name = re.sub('\(.*?\)', '', function_name)
Alexander Potapenko02a71622012-01-26 17:06:50 +0000175 file_name = fix_filename(match.group(3))
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000176 return ['%s in %s %s' % (addr, function_name, file_name)]
Alexander Potapenko02a71622012-01-26 17:06:50 +0000177 else:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000178 return ['%s in %s' % (addr, atos_line)]
Kostya Serebryany019b76f2011-11-30 01:07:02 +0000179
Alexander Potapenko8aae9552012-07-31 13:51:26 +0000180
Alexey Samsonov52565d52012-09-19 08:49:53 +0000181# Chain several symbolizers so that if one symbolizer fails, we fall back
182# to the next symbolizer in chain.
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000183class ChainSymbolizer(Symbolizer):
Alexey Samsonov52565d52012-09-19 08:49:53 +0000184 def __init__(self, symbolizer_list):
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000185 super(ChainSymbolizer, self).__init__()
Alexey Samsonov52565d52012-09-19 08:49:53 +0000186 self.symbolizer_list = symbolizer_list
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000187
Alexey Samsonov52565d52012-09-19 08:49:53 +0000188 def symbolize(self, addr, binary, offset):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000189 """Overrides Symbolizer.symbolize."""
Alexey Samsonov52565d52012-09-19 08:49:53 +0000190 for symbolizer in self.symbolizer_list:
191 if symbolizer:
192 result = symbolizer.symbolize(addr, binary, offset)
193 if result:
194 return result
195 return None
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000196
Alexey Samsonov52565d52012-09-19 08:49:53 +0000197 def append_symbolizer(self, symbolizer):
198 self.symbolizer_list.append(symbolizer)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000199
200
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000201def BreakpadSymbolizerFactory(binary):
202 suffix = os.getenv('BREAKPAD_SUFFIX')
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000203 if suffix:
204 filename = binary + suffix
205 if os.access(filename, os.F_OK):
Alexander Potapenko18003622012-08-15 13:58:24 +0000206 return BreakpadSymbolizer(filename)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000207 return None
208
209
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000210def SystemSymbolizerFactory(system, addr, binary):
211 if system == 'Darwin':
212 return DarwinSymbolizer(addr, binary)
213 elif system == 'Linux':
Alexey Samsonov52565d52012-09-19 08:49:53 +0000214 return Addr2LineSymbolizer(binary)
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000215
216
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000217class BreakpadSymbolizer(Symbolizer):
218 def __init__(self, filename):
219 super(BreakpadSymbolizer, self).__init__()
220 self.filename = filename
221 lines = file(filename).readlines()
222 self.files = []
223 self.symbols = {}
224 self.address_list = []
225 self.addresses = {}
226 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
227 fragments = lines[0].rstrip().split()
228 self.arch = fragments[2]
229 self.debug_id = fragments[3]
230 self.binary = ' '.join(fragments[4:])
231 self.parse_lines(lines[1:])
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000232
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000233 def parse_lines(self, lines):
234 cur_function_addr = ''
235 for line in lines:
236 fragments = line.split()
237 if fragments[0] == 'FILE':
238 assert int(fragments[1]) == len(self.files)
239 self.files.append(' '.join(fragments[2:]))
240 elif fragments[0] == 'PUBLIC':
241 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
Alexander Potapenko18003622012-08-15 13:58:24 +0000242 elif fragments[0] in ['CFI', 'STACK']:
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000243 pass
244 elif fragments[0] == 'FUNC':
245 cur_function_addr = int(fragments[1], 16)
Alexander Potapenko18003622012-08-15 13:58:24 +0000246 if not cur_function_addr in self.symbols.keys():
247 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000248 else:
249 # Line starting with an address.
250 addr = int(fragments[0], 16)
251 self.address_list.append(addr)
252 # Tuple of symbol address, size, line, file number.
253 self.addresses[addr] = (cur_function_addr,
254 int(fragments[1], 16),
255 int(fragments[2]),
256 int(fragments[3]))
257 self.address_list.sort()
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000258
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000259 def get_sym_file_line(self, addr):
260 key = None
261 if addr in self.addresses.keys():
262 key = addr
263 else:
264 index = bisect.bisect_left(self.address_list, addr)
265 if index == 0:
266 return None
267 else:
268 key = self.address_list[index - 1]
269 sym_id, size, line_no, file_no = self.addresses[key]
270 symbol = self.symbols[sym_id]
271 filename = self.files[file_no]
272 if addr < key + size:
273 return symbol, filename, line_no
274 else:
275 return None
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000276
Alexey Samsonov52565d52012-09-19 08:49:53 +0000277 def symbolize(self, addr, binary, offset):
278 if self.binary != binary:
279 return None
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000280 res = self.get_sym_file_line(int(offset, 16))
281 if res:
282 function_name, file_name, line_no = res
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000283 result = ['%s in %s %s:%d' % (
Alexey Samsonov52565d52012-09-19 08:49:53 +0000284 addr, function_name, file_name, line_no)]
Alexander Potapenko18003622012-08-15 13:58:24 +0000285 print result
286 return result
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000287 else:
288 return None
289
290
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000291class SymbolizationLoop(object):
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000292 def __init__(self, binary_name_filter=None):
293 # Used by clients who may want to supply a different binary name.
294 # E.g. in Chrome several binaries may share a single .dSYM.
295 self.binary_name_filter = binary_name_filter
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000296 self.system = os.uname()[0]
297 if self.system in ['Linux', 'Darwin']:
298 self.llvm_symbolizer = LLVMSymbolizerFactory(self.system)
299 else:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000300 raise Exception('Unknown system')
301
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000302 def symbolize_address(self, addr, binary, offset):
303 # Use the chain of symbolizers:
304 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
305 # (fall back to next symbolizer if the previous one fails).
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000306 if not binary in symbolizers:
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000307 symbolizers[binary] = ChainSymbolizer(
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000308 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizer])
Alexey Samsonov52565d52012-09-19 08:49:53 +0000309 result = symbolizers[binary].symbolize(addr, binary, offset)
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000310 if result is None:
311 # Initialize system symbolizer only if other symbolizers failed.
312 symbolizers[binary].append_symbolizer(
313 SystemSymbolizerFactory(self.system, addr, binary))
314 result = symbolizers[binary].symbolize(addr, binary, offset)
315 # The system symbolizer must produce some result.
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000316 assert result
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000317 return result
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000318
319 def print_symbolized_lines(self, symbolized_lines):
320 if not symbolized_lines:
321 print self.current_line
322 else:
323 for symbolized_frame in symbolized_lines:
324 print ' #' + str(self.frame_no) + ' ' + symbolized_frame.rstrip()
325 self.frame_no += 1
326
327 def process_stdin(self):
328 self.frame_no = 0
Alexander Potapenko8aae9552012-07-31 13:51:26 +0000329 for line in sys.stdin:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000330 self.current_line = line.rstrip()
Alexey Samsonov52565d52012-09-19 08:49:53 +0000331 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
332 stack_trace_line_format = (
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000333 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)')
Alexey Samsonov52565d52012-09-19 08:49:53 +0000334 match = re.match(stack_trace_line_format, line)
335 if not match:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000336 print self.current_line
Alexey Samsonov52565d52012-09-19 08:49:53 +0000337 continue
338 if DEBUG:
339 print line
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000340 _, frameno_str, addr, binary, offset = match.groups()
341 if frameno_str == '0':
Alexey Samsonov52565d52012-09-19 08:49:53 +0000342 # Assume that frame #0 is the first frame of new stack trace.
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000343 self.frame_no = 0
344 original_binary = binary
345 if self.binary_name_filter:
346 binary = self.binary_name_filter(binary)
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000347 symbolized_line = self.symbolize_address(addr, binary, offset)
Alexey Samsonov52565d52012-09-19 08:49:53 +0000348 if not symbolized_line:
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000349 if original_binary != binary:
350 symbolized_line = self.symbolize_address(addr, binary, offset)
351 self.print_symbolized_lines(symbolized_line)
Alexander Potapenko8aae9552012-07-31 13:51:26 +0000352
353
354if __name__ == '__main__':
Alexander Potapenko59cc8772012-09-26 12:12:41 +0000355 loop = SymbolizationLoop()
Alexander Potapenko3f6a5c12012-09-26 13:16:42 +0000356 loop.process_stdin()