blob: b54d5ad8ae38fb54f765e282a31c5d6988fe699e [file] [log] [blame]
Kostya Serebryany1e172b42011-11-30 01:07:02 +00001#!/usr/bin/env python
2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===#
3#
4# The LLVM Compiler Infrastructure
5#
6# This file is distributed under the University of Illinois Open Source
7# License. See LICENSE.TXT for details.
8#
9#===------------------------------------------------------------------------===#
Alexander Potapenko8e398692012-08-02 13:59:23 +000010import bisect
Kostya Serebryany1e172b42011-11-30 01:07:02 +000011import os
12import re
13import sys
Kostya Serebryany1e172b42011-11-30 01:07:02 +000014import subprocess
15
16pipes = {}
Alexander Potapenko00424112012-01-26 17:06:50 +000017filetypes = {}
Alexander Potapenko8e398692012-08-02 13:59:23 +000018vmaddrs = {}
19DEBUG = False
Kostya Serebryany1e172b42011-11-30 01:07:02 +000020
Alexander Potapenko970a9b92012-07-31 13:51:26 +000021
Alexander Potapenko00424112012-01-26 17:06:50 +000022def fix_filename(file_name):
23 for path_to_cut in sys.argv[1:]:
24 file_name = re.sub(".*" + path_to_cut, "", file_name)
25 file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name)
26 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name)
27 return file_name
28
29
Kostya Serebryany1e172b42011-11-30 01:07:02 +000030# TODO(glider): need some refactoring here
31def symbolize_addr2line(line):
32 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
33 match = re.match('^( *#([0-9]+) *0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', line)
34 if match:
Alexander Potapenko8e398692012-08-02 13:59:23 +000035 # frameno = match.group(2)
Kostya Serebryany1e172b42011-11-30 01:07:02 +000036 binary = match.group(3)
37 addr = match.group(4)
Kostya Serebryany1e172b42011-11-30 01:07:02 +000038 if not pipes.has_key(binary):
39 pipes[binary] = subprocess.Popen(["addr2line", "-f", "-e", binary],
40 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
41 p = pipes[binary]
42 try:
Alexander Potapenko8e398692012-08-02 13:59:23 +000043 print >> p.stdin, addr
Kostya Serebryany1e172b42011-11-30 01:07:02 +000044 function_name = p.stdout.readline().rstrip()
45 file_name = p.stdout.readline().rstrip()
Alexander Potapenko8e398692012-08-02 13:59:23 +000046 except Exception:
Kostya Serebryany1e172b42011-11-30 01:07:02 +000047 function_name = ""
48 file_name = ""
Alexander Potapenko00424112012-01-26 17:06:50 +000049 file_name = fix_filename(file_name)
Kostya Serebryany1e172b42011-11-30 01:07:02 +000050
51 print match.group(1), "in", function_name, file_name
52 else:
53 print line.rstrip()
54
Alexander Potapenko00424112012-01-26 17:06:50 +000055
Alexander Potapenko8e398692012-08-02 13:59:23 +000056class Symbolizer(object):
57 def __init__(self):
58 pass
Alexander Potapenko00424112012-01-26 17:06:50 +000059
60
Alexander Potapenko8e398692012-08-02 13:59:23 +000061class DarwinSymbolizer(Symbolizer):
62 def __init__(self, addr, binary):
63 super(DarwinSymbolizer, self).__init__()
64 self.binary = binary
65 # Guess which arch we're running. 10 = len("0x") + 8 hex digits.
66 if len(addr) > 10:
67 self.arch = "x86_64"
Kostya Serebryany1e172b42011-11-30 01:07:02 +000068 else:
Alexander Potapenko8e398692012-08-02 13:59:23 +000069 self.arch = "i386"
70 self.vmaddr = None
71 self.pipe = None
72 def get_binary_vmaddr(self):
73 """
74 Get the slide value to be added to the address.
75 We're ooking for the following piece in otool -l output:
76 Load command 0
77 cmd LC_SEGMENT
78 cmdsize 736
79 segname __TEXT
80 vmaddr 0x00000000
81 """
82 if self.vmaddr:
83 return self.vmaddr
84 cmdline = ["otool", "-l", self.binary]
85 pipe = subprocess.Popen(cmdline,
86 stdin=subprocess.PIPE,
87 stdout=subprocess.PIPE)
88 is_text = False
89 vmaddr = 0
90 for line in pipe.stdout.readlines():
91 line = line.strip()
92 if line.startswith('segname'):
93 is_text = (line == 'segname __TEXT')
94 continue
95 if line.startswith('vmaddr') and is_text:
96 sv = line.split(' ')
97 vmaddr = int(sv[-1], 16)
98 break
99 self.vmaddr = vmaddr
100 return self.vmaddr
101 def write_addr_to_pipe(self, offset):
102 slide = self.get_binary_vmaddr()
103 print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide)
104 def open_atos(self):
105 if DEBUG:
106 print "atos -o %s -arch %s" % (self.binary, self.arch)
107 cmdline = ["atos", "-o", self.binary, "-arch", self.arch]
108 self.pipe = subprocess.Popen(cmdline,
109 stdin=subprocess.PIPE,
110 stdout=subprocess.PIPE,
111 stderr=subprocess.PIPE)
112 def symbolize(self, prefix, addr, offset):
113 self.open_atos()
114 self.write_addr_to_pipe(offset)
115 self.pipe.stdin.close()
116 atos_line = self.pipe.stdout.readline().rstrip()
Alexander Potapenko00424112012-01-26 17:06:50 +0000117 # A well-formed atos response looks like this:
118 # foo(type1, type2) (in object.name) (filename.cc:80)
119 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line)
Alexander Potapenko8e398692012-08-02 13:59:23 +0000120 if DEBUG:
121 print "atos_line: ", atos_line
Alexander Potapenko00424112012-01-26 17:06:50 +0000122 if match:
123 function_name = match.group(1)
124 function_name = re.sub("\(.*?\)", "", function_name)
125 file_name = fix_filename(match.group(3))
Alexander Potapenko8e398692012-08-02 13:59:23 +0000126 return "%s%s in %s %s" % (prefix, addr, function_name, file_name)
Alexander Potapenko00424112012-01-26 17:06:50 +0000127 else:
Alexander Potapenko8e398692012-08-02 13:59:23 +0000128 return "%s%s in %s" % (prefix, addr, atos_line)
Kostya Serebryany1e172b42011-11-30 01:07:02 +0000129
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000130
Alexander Potapenko8e398692012-08-02 13:59:23 +0000131# Chain two symbolizers so that the second one is called if the first fails.
132class ChainSymbolizer(Symbolizer):
133 def __init__(self, symbolizer1, symbolizer2):
134 super(ChainSymbolizer, self).__init__()
135 self.symbolizer1 = symbolizer1
136 self.symbolizer2 = symbolizer2
137 def symbolize(self, prefix, addr, offset):
138 result = self.symbolizer1.symbolize(prefix, addr, offset)
139 if result is None:
140 result = self.symbolizer2.symbolize(prefix, addr, offset)
141 return result
142
143
144def BreakpadSymbolizerFactory(addr, binary):
145 suffix = os.getenv("BREAKPAD_SUFFIX")
146 if suffix:
147 filename = binary + suffix
148 if os.access(filename, os.F_OK):
149 return BreakpadSymbolizer(addr, filename)
150 return None
151
152
153class BreakpadSymbolizer(Symbolizer):
154 def __init__(self, filename):
155 super(BreakpadSymbolizer, self).__init__()
156 self.filename = filename
157 lines = file(filename).readlines()
158 self.files = []
159 self.symbols = {}
160 self.address_list = []
161 self.addresses = {}
162 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t
163 fragments = lines[0].rstrip().split()
164 self.arch = fragments[2]
165 self.debug_id = fragments[3]
166 self.binary = ' '.join(fragments[4:])
167 self.parse_lines(lines[1:])
168 def parse_lines(self, lines):
169 cur_function_addr = ''
170 for line in lines:
171 fragments = line.split()
172 if fragments[0] == 'FILE':
173 assert int(fragments[1]) == len(self.files)
174 self.files.append(' '.join(fragments[2:]))
175 elif fragments[0] == 'PUBLIC':
176 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:])
177 elif fragments[0] == 'CFI':
178 pass
179 elif fragments[0] == 'FUNC':
180 cur_function_addr = int(fragments[1], 16)
181 else:
182 # Line starting with an address.
183 addr = int(fragments[0], 16)
184 self.address_list.append(addr)
185 # Tuple of symbol address, size, line, file number.
186 self.addresses[addr] = (cur_function_addr,
187 int(fragments[1], 16),
188 int(fragments[2]),
189 int(fragments[3]))
190 self.address_list.sort()
191 def get_sym_file_line(self, addr):
192 key = None
193 if addr in self.addresses.keys():
194 key = addr
195 else:
196 index = bisect.bisect_left(self.address_list, addr)
197 if index == 0:
198 return None
199 else:
200 key = self.address_list[index - 1]
201 sym_id, size, line_no, file_no = self.addresses[key]
202 symbol = self.symbols[sym_id]
203 filename = self.files[file_no]
204 if addr < key + size:
205 return symbol, filename, line_no
206 else:
207 return None
208 def symbolize(self, prefix, addr, offset):
209 res = self.get_sym_file_line(int(offset, 16))
210 if res:
211 function_name, file_name, line_no = res
212 return "%s%s in %s %s:%d" % (
213 prefix, addr, function_name, file_name, line_no)
214 else:
215 return None
216
217
218def symbolize_line(line):
219 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45)
220 match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)',
221 line)
222 if match:
223 if DEBUG:
224 print line
225 prefix = match.group(1)
226 # frameno = match.group(2)
227 addr = match.group(3)
228 binary = match.group(4)
229 offset = match.group(5)
230 if not pipes.has_key(binary):
231 p = BreakpadSymbolizerFactory(addr, binary)
232 if p:
233 pipes[binary] = p
234 else:
235 pipes[binary] = DarwinSymbolizer(addr, binary)
236 result = pipes[binary].symbolize(prefix, addr, offset)
237 if result is None:
238 pipes[binary] = ChainSymbolizer(pipes[binary],
239 DarwinSymbolizer(addr, binary))
240 return pipes[binary].symbolize(prefix, addr, offset)
241 else:
242 return line
243
244
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000245def main():
246 system = os.uname()[0]
247 if system in ['Linux', 'Darwin']:
248 for line in sys.stdin:
249 if system == 'Linux':
250 symbolize_addr2line(line)
251 elif system == 'Darwin':
Alexander Potapenko8e398692012-08-02 13:59:23 +0000252 line = symbolize_line(line)
253 print line.rstrip()
Alexander Potapenko970a9b92012-07-31 13:51:26 +0000254 else:
255 print 'Unknown system: ', system
256
257
258if __name__ == '__main__':
259 main()