blob: 5084b836a5b81c3a553c1383625da332088d1a98 [file] [log] [blame]
Kostya Serebryany1e172b42011-11-30 01:07:02 +00001#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
Alexander Potapenko8e398692012-08-02 13:59:23 +000010import bisect
Kostya Serebryany1e172b42011-11-30 01:07:02 +000011import os
12import re
13import sys
Kostya Serebryany1e172b42011-11-30 01:07:02 +000014import subprocess
15
Alexey Samsonov63e4df42012-09-19 08:49:53 +000016llvm_symbolizer = None
Alexander Potapenko897e89f2012-08-02 14:58:04 +000017symbolizers = {}
Alexander Potapenko00424112012-01-26 17:06:50 +000018filetypes = {}
Alexander Potapenko8e398692012-08-02 13:59:23 +000019vmaddrs = {}
20DEBUG = False
Kostya Serebryany1e172b42011-11-30 01:07:02 +000021
Alexander Potapenko970a9b92012-07-31 13:51:26 +000022
Alexander Potapenko00424112012-01-26 17:06:50 +000023def fix_filename(file_name):
24 for path_to_cut in sys.argv[1:]:
25 file_name = re.sub(".*" + path_to_cut, "", file_name)
26 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
27 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
28 return file_name
29
30
Alexander Potapenko897e89f2012-08-02 14:58:04 +000031class Symbolizer(object):
32 def __init__(self):
33 pass
Alexey Samsonov63e4df42012-09-19 08:49:53 +000034 def symbolize(self, addr, binary, offset):
35 """
36 Overrided in subclasses.
37 Args:
38 addr: virtual address of an instruction.
39 binary: path to executable/shared object containing this instruction.
40 offset: instruction offset in the @binary.
41 Returns:
42 list of strings (one string for each inlined frame) describing
43 the code locations for this instruction (that is, function name, file
44 name, line and column numbers).
45 """
46 return None
Alexander Potapenko897e89f2012-08-02 14:58:04 +000047
48
Alexey Samsonov63e4df42012-09-19 08:49:53 +000049class LLVMSymbolizer(Symbolizer):
50 def __init__(self, symbolizer_path):
51 super(LLVMSymbolizer, self).__init__()
52 self.symbolizer_path = symbolizer_path
53 self.pipe = self.open_llvm_symbolizer()
54 def open_llvm_symbolizer(self):
55 cmd = [self.symbolizer_path,
56 "--use-symbol-table=false", # FIXME: Remove this when libObject is
57 # fixed.
58 "--demangle=false",
59 "--functions=true",
60 "--inlining=true"]
61 if DEBUG:
62 print ' '.join(cmd)
63 return subprocess.Popen(cmd, stdin=subprocess.PIPE,
64 stdout=subprocess.PIPE)
65
66 def symbolize(self, addr, binary, offset):
67 """Overrides Symbolizer.symbolize"""
68 result = []
69 try:
70 symbolizer_input = "%s %s" % (binary, offset)
71 print >> self.pipe.stdin, symbolizer_input
72 while True:
73 function_name = self.pipe.stdout.readline().rstrip()
74 if (function_name == ""):
75 break
76 file_name = self.pipe.stdout.readline().rstrip()
77 file_name = fix_filename(file_name)
78 if (not function_name.startswith("??") and
79 not file_name.startswith("??")):
80 # Append only valid frames.
81 result.append("%s in %s %s" % (addr, function_name,
82 file_name))
83 except Exception:
84 result = []
85 if len(result) == 0:
86 result = None
87 return result
88
89
90def LLVMSymbolizerFactory(system):
91 if system == "Linux":
92 symbolizer_path = os.getenv("LLVM_SYMBOLIZER_PATH")
93 if not symbolizer_path:
94 # Assume llvm-symbolizer is in PATH.
95 symbolizer_path = "llvm-symbolizer"
96 return LLVMSymbolizer(symbolizer_path)
97 return None
98
99
100class Addr2LineSymbolizer(Symbolizer):
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000101 def __init__(self, binary):
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000102 super(Addr2LineSymbolizer, self).__init__()
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000103 self.binary = binary
104 self.pipe = self.open_addr2line()
105 def open_addr2line(self):
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000106 cmd = ["addr2line", "-f", "-e", self.binary]
107 if DEBUG:
108 print ' '.join(cmd)
109 return subprocess.Popen(cmd,
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000110 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000111 def symbolize(self, addr, binary, offset):
112 """Overrides Symbolizer.symbolize"""
113 if self.binary != binary:
114 return None
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000115 try:
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000116 print >> self.pipe.stdin, offset
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000117 function_name = self.pipe.stdout.readline().rstrip()
118 file_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenko8e398692012-08-02 13:59:23 +0000119 except Exception:
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000120 function_name = ""
121 file_name = ""
Alexander Potapenko00424112012-01-26 17:06:50 +0000122 file_name = fix_filename(file_name)
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000123 return ["%s in %s %s" % (addr, function_name, file_name)]
Alexander Potapenko00424112012-01-26 17:06:50 +0000124
125
Alexander Potapenko8e398692012-08-02 13:59:23 +0000126class DarwinSymbolizer(Symbolizer):
127 def __init__(self, addr, binary):
128 super(DarwinSymbolizer, self).__init__()
129 self.binary = binary
130 # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
131 if len(addr) > 10:
132 self.arch = "x86_64"
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000133 else:
Alexander Potapenko8e398692012-08-02 13:59:23 +0000134 self.arch = "i386"
135 self.vmaddr = None
136 self.pipe = None
137 def get_binary_vmaddr(self):
138 """
139 Get the slide value to be added to the address.
140 We're ooking for the following piece in otool -l output:
141 Load command 0
142 cmd LC_SEGMENT
143 cmdsize 736
144 segname __TEXT
145 vmaddr 0x00000000
146 """
147 if self.vmaddr:
148 return self.vmaddr
149 cmdline = ["otool", "-l", self.binary]
150 pipe = subprocess.Popen(cmdline,
151 stdin=subprocess.PIPE,
152 stdout=subprocess.PIPE)
153 is_text = False
154 vmaddr = 0
155 for line in pipe.stdout.readlines():
156 line = line.strip()
157 if line.startswith('segname'):
158 is_text = (line == 'segname __TEXT')
159 continue
160 if line.startswith('vmaddr') and is_text:
161 sv = line.split(' ')
162 vmaddr = int(sv[-1], 16)
163 break
164 self.vmaddr = vmaddr
165 return self.vmaddr
166 def write_addr_to_pipe(self, offset):
167 slide = self.get_binary_vmaddr()
168 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
169 def open_atos(self):
170 if DEBUG:
171 print "atos -o %s -arch %s" % (self.binary, self.arch)
172 cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
173 self.pipe = subprocess.Popen(cmdline,
174 stdin=subprocess.PIPE,
175 stdout=subprocess.PIPE,
176 stderr=subprocess.PIPE)
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000177 def symbolize(self, addr, binary, offset):
178 """Overrides Symbolizer.symbolize"""
179 if self.binary != binary:
180 return None
Alexander Potapenko8e398692012-08-02 13:59:23 +0000181 self.open_atos()
182 self.write_addr_to_pipe(offset)
183 self.pipe.stdin.close()
184 atos_line = self.pipe.stdout.readline().rstrip()
Alexander Potapenko00424112012-01-26 17:06:50 +0000185 # A well-formed atos response looks like this:
186 # foo(type1, type2) (in object.name) (filename.cc:80)
187 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000188 if DEBUG:
189 print "atos_line: ", atos_line
Alexander Potapenko00424112012-01-26 17:06:50 +0000190 if match:
191 function_name = match.group(1)
192 function_name = re.sub("\(.*?\)", "", function_name)
193 file_name = fix_filename(match.group(3))
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000194 return ["%s in %s %s" % (addr, function_name, file_name)]
Alexander Potapenko00424112012-01-26 17:06:50 +0000195 else:
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000196 return ["%s in %s" % (addr, atos_line)]
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000197
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000198
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000199# Chain several symbolizers so that if one symbolizer fails, we fall back
200# to the next symbolizer in chain.
Alexander Potapenko8e398692012-08-02 13:59:23 +0000201class ChainSymbolizer(Symbolizer):
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000202 def __init__(self, symbolizer_list):
Alexander Potapenko8e398692012-08-02 13:59:23 +0000203 super(ChainSymbolizer, self).__init__()
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000204 self.symbolizer_list = symbolizer_list
205 def symbolize(self, addr, binary, offset):
206 """Overrides Symbolizer.symbolize"""
207 for symbolizer in self.symbolizer_list:
208 if symbolizer:
209 result = symbolizer.symbolize(addr, binary, offset)
210 if result:
211 return result
212 return None
213 def append_symbolizer(self, symbolizer):
214 self.symbolizer_list.append(symbolizer)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000215
216
217def BreakpadSymbolizerFactory(addr, binary):
218 suffix = os.getenv("BREAKPAD_SUFFIX")
219 if suffix:
220 filename = binary + suffix
221 if os.access(filename, os.F_OK):
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000222 return BreakpadSymbolizer(filename)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000223 return None
224
225
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000226def SystemSymbolizerFactory(system, addr, binary):
227 if system == 'Darwin':
228 return DarwinSymbolizer(addr, binary)
229 elif system == 'Linux':
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000230 return Addr2LineSymbolizer(binary)
Alexander Potapenko897e89f2012-08-02 14:58:04 +0000231
232
Alexander Potapenko8e398692012-08-02 13:59:23 +0000233class BreakpadSymbolizer(Symbolizer):
234 def __init__(self, filename):
235 super(BreakpadSymbolizer, self).__init__()
236 self.filename = filename
237 lines = file(filename).readlines()
238 self.files = []
239 self.symbols = {}
240 self.address_list = []
241 self.addresses = {}
242 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
243 fragments = lines[0].rstrip().split()
244 self.arch = fragments[2]
245 self.debug_id = fragments[3]
246 self.binary = ' '.join(fragments[4:])
247 self.parse_lines(lines[1:])
248 def parse_lines(self, lines):
249 cur_function_addr = ''
250 for line in lines:
251 fragments = line.split()
252 if fragments[0] == 'FILE':
253 assert int(fragments[1]) == len(self.files)
254 self.files.append(' '.join(fragments[2:]))
255 elif fragments[0] == 'PUBLIC':
256 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000257 elif fragments[0] in ['CFI', 'STACK']:
Alexander Potapenko8e398692012-08-02 13:59:23 +0000258 pass
259 elif fragments[0] == 'FUNC':
260 cur_function_addr = int(fragments[1], 16)
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000261 if not cur_function_addr in self.symbols.keys():
262 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
Alexander Potapenko8e398692012-08-02 13:59:23 +0000263 else:
264 # Line starting with an address.
265 addr = int(fragments[0], 16)
266 self.address_list.append(addr)
267 # Tuple of symbol address, size, line, file number.
268 self.addresses[addr] = (cur_function_addr,
269 int(fragments[1], 16),
270 int(fragments[2]),
271 int(fragments[3]))
272 self.address_list.sort()
273 def get_sym_file_line(self, addr):
274 key = None
275 if addr in self.addresses.keys():
276 key = addr
277 else:
278 index = bisect.bisect_left(self.address_list, addr)
279 if index == 0:
280 return None
281 else:
282 key = self.address_list[index - 1]
283 sym_id, size, line_no, file_no = self.addresses[key]
284 symbol = self.symbols[sym_id]
285 filename = self.files[file_no]
286 if addr < key + size:
287 return symbol, filename, line_no
288 else:
289 return None
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000290 def symbolize(self, addr, binary, offset):
291 if self.binary != binary:
292 return None
Alexander Potapenko8e398692012-08-02 13:59:23 +0000293 res = self.get_sym_file_line(int(offset, 16))
294 if res:
295 function_name, file_name, line_no = res
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000296 result = ["%s in %s %s:%d" % (
297 addr, function_name, file_name, line_no)]
Alexander Potapenko5cfa30e2012-08-15 13:58:24 +0000298 print result
299 return result
Alexander Potapenko8e398692012-08-02 13:59:23 +0000300 else:
301 return None
302
303
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000304def symbolize_address(system, addr, binary, offset):
305 # Use the chain of symbolizers:
306 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos
307 # (fall back to next symbolizer if the previous one fails).
308 if not symbolizers.has_key(binary):
309 symbolizers[binary] = ChainSymbolizer(
310 [BreakpadSymbolizerFactory(addr, binary), llvm_symbolizer])
311 result = symbolizers[binary].symbolize(addr, binary, offset)
312 if result is None:
313 # Initialize system symbolizer only if other symbolizers failed.
314 symbolizers[binary].append_symbolizer(
315 SystemSymbolizerFactory(system, addr, binary))
316 result = symbolizers[binary].symbolize(addr, binary, offset)
317 # The system symbolizer must produce some result.
318 assert(result)
319 return result
Alexander Potapenko8e398692012-08-02 13:59:23 +0000320
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000321def main():
322 system = os.uname()[0]
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000323 global llvm_symbolizer
324 llvm_symbolizer = LLVMSymbolizerFactory(system)
325 frame_no = 0
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000326 if system in ['Linux', 'Darwin']:
327 for line in sys.stdin:
Alexey Samsonov63e4df42012-09-19 08:49:53 +0000328 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
329 stack_trace_line_format = (
330 "^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)")
331 match = re.match(stack_trace_line_format, line)
332 if not match:
333 print line.rstrip()
334 continue
335 if DEBUG:
336 print line
337 prefix, frameno_str, addr, binary, offset = match.groups()
338 if (frameno_str == "0"):
339 # Assume that frame #0 is the first frame of new stack trace.
340 frame_no = 0
341 symbolized_line = symbolize_address(system, addr, binary, offset)
342 if not symbolized_line:
343 print line.rstrip()
344 else:
345 for symbolized_frame in symbolized_line:
346 print " #" + str(frame_no) + " " + symbolized_frame.rstrip()
347 frame_no += 1
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000348 else:
349 print 'Unknown system: ', system
350
351
352if __name__ == '__main__':
353 main()