blob: 5fe87e049e259b651b35eed3e2f72380f5d33b6c [file] [log] [blame]
Kostya Serebryany019b76f2011-11-30 01:07:02 +00001#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000010import bisect
Kostya Serebryany019b76f2011-11-30 01:07:02 +000011import os
12import re
13import sys
Kostya Serebryany019b76f2011-11-30 01:07:02 +000014import subprocess
15
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000016symbolizers = {}
Alexander Potapenko02a71622012-01-26 17:06:50 +000017filetypes = {}
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000018vmaddrs = {}
19DEBUG = False
Kostya Serebryany019b76f2011-11-30 01:07:02 +000020
Alexander Potapenko8aae9552012-07-31 13:51:26 +000021
Alexander Potapenko02a71622012-01-26 17:06:50 +000022def fix_filename(file_name):
23 for path_to_cut in sys.argv[1:]:
24 file_name = re.sub(".*" + path_to_cut, "", file_name)
25 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
26 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
27 return file_name
28
29
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000030class Symbolizer(object):
31 def __init__(self):
32 pass
33
34
35class LinuxSymbolizer(Symbolizer):
36 def __init__(self, binary):
37 super(LinuxSymbolizer, self).__init__()
38 self.binary = binary
39 self.pipe = self.open_addr2line()
40 def open_addr2line(self):
Alexander Potapenko18003622012-08-15 13:58:24 +000041 cmd = ["addr2line", "-f", "-e", self.binary]
42 if DEBUG:
43 print ' '.join(cmd)
44 return subprocess.Popen(cmd,
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000045 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
Alexander Potapenko18003622012-08-15 13:58:24 +000046 def symbolize(self, prefix, addr, offset):
Kostya Serebryany019b76f2011-11-30 01:07:02 +000047 try:
Alexander Potapenko18003622012-08-15 13:58:24 +000048 print >> self.pipe.stdin, offset
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000049 function_name = self.pipe.stdout.readline().rstrip()
50 file_name = self.pipe.stdout.readline().rstrip()
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000051 except Exception:
Kostya Serebryany019b76f2011-11-30 01:07:02 +000052 function_name = ""
53 file_name = ""
Alexander Potapenko02a71622012-01-26 17:06:50 +000054 file_name = fix_filename(file_name)
Alexander Potapenko879b1ff2012-08-02 14:58:04 +000055 return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
Alexander Potapenko02a71622012-01-26 17:06:50 +000056
57
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000058class DarwinSymbolizer(Symbolizer):
59 def __init__(self, addr, binary):
60 super(DarwinSymbolizer, self).__init__()
61 self.binary = binary
62 # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
63 if len(addr) > 10:
64 self.arch = "x86_64"
Kostya Serebryany019b76f2011-11-30 01:07:02 +000065 else:
Alexander Potapenkobe84ac82012-08-02 13:59:23 +000066 self.arch = "i386"
67 self.vmaddr = None
68 self.pipe = None
69 def get_binary_vmaddr(self):
70 """
71 Get the slide value to be added to the address.
72 We're ooking for the following piece in otool -l output:
73 Load command 0
74 cmd LC_SEGMENT
75 cmdsize 736
76 segname __TEXT
77 vmaddr 0x00000000
78 """
79 if self.vmaddr:
80 return self.vmaddr
81 cmdline = ["otool", "-l", self.binary]
82 pipe = subprocess.Popen(cmdline,
83 stdin=subprocess.PIPE,
84 stdout=subprocess.PIPE)
85 is_text = False
86 vmaddr = 0
87 for line in pipe.stdout.readlines():
88 line = line.strip()
89 if line.startswith('segname'):
90 is_text = (line == 'segname __TEXT')
91 continue
92 if line.startswith('vmaddr') and is_text:
93 sv = line.split(' ')
94 vmaddr = int(sv[-1], 16)
95 break
96 self.vmaddr = vmaddr
97 return self.vmaddr
98 def write_addr_to_pipe(self, offset):
99 slide = self.get_binary_vmaddr()
100 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
101 def open_atos(self):
102 if DEBUG:
103 print "atos -o %s -arch %s" % (self.binary, self.arch)
104 cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
105 self.pipe = subprocess.Popen(cmdline,
106 stdin=subprocess.PIPE,
107 stdout=subprocess.PIPE,
108 stderr=subprocess.PIPE)
109 def symbolize(self, prefix, addr, offset):
110 self.open_atos()
111 self.write_addr_to_pipe(offset)
112 self.pipe.stdin.close()
113 atos_line = self.pipe.stdout.readline().rstrip()
Alexander Potapenko02a71622012-01-26 17:06:50 +0000114 # A well-formed atos response looks like this:
115 # foo(type1, type2) (in object.name) (filename.cc:80)
116 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000117 if DEBUG:
118 print "atos_line: ", atos_line
Alexander Potapenko02a71622012-01-26 17:06:50 +0000119 if match:
120 function_name = match.group(1)
121 function_name = re.sub("\(.*?\)", "", function_name)
122 file_name = fix_filename(match.group(3))
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000123 return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
Alexander Potapenko02a71622012-01-26 17:06:50 +0000124 else:
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000125 return "%s%s in %s" % (prefix, addr, atos_line)
Kostya Serebryany019b76f2011-11-30 01:07:02 +0000126
Alexander Potapenko8aae9552012-07-31 13:51:26 +0000127
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000128# Chain two symbolizers so that the second one is called if the first fails.
129class ChainSymbolizer(Symbolizer):
130 def __init__(self, symbolizer1, symbolizer2):
131 super(ChainSymbolizer, self).__init__()
132 self.symbolizer1 = symbolizer1
133 self.symbolizer2 = symbolizer2
134 def symbolize(self, prefix, addr, offset):
135 result = self.symbolizer1.symbolize(prefix, addr, offset)
136 if result is None:
137 result = self.symbolizer2.symbolize(prefix, addr, offset)
138 return result
139
140
141def BreakpadSymbolizerFactory(addr, binary):
142 suffix = os.getenv("BREAKPAD_SUFFIX")
143 if suffix:
144 filename = binary + suffix
145 if os.access(filename, os.F_OK):
Alexander Potapenko18003622012-08-15 13:58:24 +0000146 return BreakpadSymbolizer(filename)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000147 return None
148
149
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000150def SystemSymbolizerFactory(system, addr, binary):
151 if system == 'Darwin':
152 return DarwinSymbolizer(addr, binary)
153 elif system == 'Linux':
154 return LinuxSymbolizer(binary)
155
156
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000157class BreakpadSymbolizer(Symbolizer):
158 def __init__(self, filename):
159 super(BreakpadSymbolizer, self).__init__()
160 self.filename = filename
161 lines = file(filename).readlines()
162 self.files = []
163 self.symbols = {}
164 self.address_list = []
165 self.addresses = {}
166 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
167 fragments = lines[0].rstrip().split()
168 self.arch = fragments[2]
169 self.debug_id = fragments[3]
170 self.binary = ' '.join(fragments[4:])
171 self.parse_lines(lines[1:])
172 def parse_lines(self, lines):
173 cur_function_addr = ''
174 for line in lines:
175 fragments = line.split()
176 if fragments[0] == 'FILE':
177 assert int(fragments[1]) == len(self.files)
178 self.files.append(' '.join(fragments[2:]))
179 elif fragments[0] == 'PUBLIC':
180 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
Alexander Potapenko18003622012-08-15 13:58:24 +0000181 elif fragments[0] in ['CFI', 'STACK']:
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000182 pass
183 elif fragments[0] == 'FUNC':
184 cur_function_addr = int(fragments[1], 16)
Alexander Potapenko18003622012-08-15 13:58:24 +0000185 if not cur_function_addr in self.symbols.keys():
186 self.symbols[cur_function_addr] = ' '.join(fragments[4:])
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000187 else:
188 # Line starting with an address.
189 addr = int(fragments[0], 16)
190 self.address_list.append(addr)
191 # Tuple of symbol address, size, line, file number.
192 self.addresses[addr] = (cur_function_addr,
193 int(fragments[1], 16),
194 int(fragments[2]),
195 int(fragments[3]))
196 self.address_list.sort()
197 def get_sym_file_line(self, addr):
198 key = None
199 if addr in self.addresses.keys():
200 key = addr
201 else:
202 index = bisect.bisect_left(self.address_list, addr)
203 if index == 0:
204 return None
205 else:
206 key = self.address_list[index - 1]
207 sym_id, size, line_no, file_no = self.addresses[key]
208 symbol = self.symbols[sym_id]
209 filename = self.files[file_no]
210 if addr < key + size:
211 return symbol, filename, line_no
212 else:
213 return None
214 def symbolize(self, prefix, addr, offset):
215 res = self.get_sym_file_line(int(offset, 16))
216 if res:
217 function_name, file_name, line_no = res
Alexander Potapenko18003622012-08-15 13:58:24 +0000218 result = "%s%s in %s %s:%d" % (
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000219 prefix, addr, function_name, file_name, line_no)
Alexander Potapenko18003622012-08-15 13:58:24 +0000220 print result
221 return result
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000222 else:
223 return None
224
225
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000226def symbolize_line(system, line):
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000227 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
228 match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)',
229 line)
230 if match:
231 if DEBUG:
232 print line
233 prefix = match.group(1)
234 # frameno = match.group(2)
235 addr = match.group(3)
236 binary = match.group(4)
237 offset = match.group(5)
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000238 if not symbolizers.has_key(binary):
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000239 p = BreakpadSymbolizerFactory(addr, binary)
240 if p:
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000241 symbolizers[binary] = p
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000242 else:
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000243 symbolizers[binary] = SystemSymbolizerFactory(system, addr, binary)
244 result = symbolizers[binary].symbolize(prefix, addr, offset)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000245 if result is None:
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000246 symbolizers[binary] = ChainSymbolizer(symbolizers[binary],
247 SystemSymbolizerFactory(system, addr, binary))
248 return symbolizers[binary].symbolize(prefix, addr, offset)
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000249 else:
250 return line
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000251
Alexander Potapenkobe84ac82012-08-02 13:59:23 +0000252
Alexander Potapenko8aae9552012-07-31 13:51:26 +0000253def main():
254 system = os.uname()[0]
255 if system in ['Linux', 'Darwin']:
256 for line in sys.stdin:
Alexander Potapenko879b1ff2012-08-02 14:58:04 +0000257 line = symbolize_line(system, line)
258 print line.rstrip()
Alexander Potapenko8aae9552012-07-31 13:51:26 +0000259 else:
260 print 'Unknown system: ', system
261
262
263if __name__ == '__main__':
264 main()