blob: bed274b50d2255e9ddb5ddd0d9469ca32c72b2c4 [file] [log] [blame]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -07001#!/usr/bin/python
2#
Ben Chengb42dad02013-04-25 15:14:04 -07003# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070016
17"""Module for looking up symbolic debugging information.
18
19The information can include symbol names, offsets, and source locations.
20"""
21
Elliott Hughes08365932014-06-13 18:12:25 -070022import glob
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070023import os
Yang Nie4b2a1a2014-11-06 17:42:33 -080024import platform
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070025import re
26import subprocess
Elliott Hughesc3c86192014-08-29 13:49:57 -070027import unittest
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070028
29ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
30if not ANDROID_BUILD_TOP:
31 ANDROID_BUILD_TOP = "."
32
33def FindSymbolsDir():
34 saveddir = os.getcwd()
35 os.chdir(ANDROID_BUILD_TOP)
36 try:
37 cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
38 "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
39 "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
40 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
41 return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
42 finally:
43 os.chdir(saveddir)
44
45SYMBOLS_DIR = FindSymbolsDir()
46
Ben Chengb42dad02013-04-25 15:14:04 -070047ARCH = "arm"
48
Elliott Hughesc3c86192014-08-29 13:49:57 -070049
50# These are private. Do not access them from other modules.
51_CACHED_TOOLCHAIN = None
52_CACHED_TOOLCHAIN_ARCH = None
53
Ben Chengb42dad02013-04-25 15:14:04 -070054
Elliott Hughes08365932014-06-13 18:12:25 -070055def ToolPath(tool, toolchain=None):
56 """Return a fully-qualified path to the specified tool"""
57 if not toolchain:
58 toolchain = FindToolchain()
59 return glob.glob(os.path.join(toolchain, "*-" + tool))[0]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070060
Elliott Hughesc3c86192014-08-29 13:49:57 -070061
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070062def FindToolchain():
Elliott Hughesc3c86192014-08-29 13:49:57 -070063 """Returns the toolchain matching ARCH."""
64 global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH
65 if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH:
66 return _CACHED_TOOLCHAIN
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070067
Elliott Hughesc3166be2014-07-07 15:06:28 -070068 # We use slightly different names from GCC, and there's only one toolchain
Elliott Hughesc3c86192014-08-29 13:49:57 -070069 # for x86/x86_64. Note that these are the names of the top-level directory
70 # rather than the _different_ names used lower down the directory hierarchy!
71 gcc_dir = ARCH
72 if gcc_dir == "arm64":
73 gcc_dir = "aarch64"
74 elif gcc_dir == "mips64":
75 gcc_dir = "mips"
76 elif gcc_dir == "x86_64":
77 gcc_dir = "x86"
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070078
Yang Nie4b2a1a2014-11-06 17:42:33 -080079 os_name = platform.system().lower();
80
81 available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir))
Elliott Hughesc3c86192014-08-29 13:49:57 -070082 if len(available_toolchains) == 0:
83 raise Exception("Could not find tool chain for %s" % (ARCH))
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070084
Elliott Hughesc3c86192014-08-29 13:49:57 -070085 toolchain = sorted(available_toolchains)[-1]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070086
Elliott Hughes08365932014-06-13 18:12:25 -070087 if not os.path.exists(ToolPath("addr2line", toolchain)):
88 raise Exception("No addr2line for %s" % (toolchain))
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070089
Elliott Hughesc3c86192014-08-29 13:49:57 -070090 _CACHED_TOOLCHAIN = toolchain
91 _CACHED_TOOLCHAIN_ARCH = ARCH
92 print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN)
93 return _CACHED_TOOLCHAIN
94
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070095
Iliyan Malchev4929d6a2011-08-04 17:44:40 -070096def SymbolInformation(lib, addr):
97 """Look up symbol information about an address.
98
99 Args:
100 lib: library (or executable) pathname containing symbols
101 addr: string hexidecimal address
102
103 Returns:
Ben Chengb42dad02013-04-25 15:14:04 -0700104 A list of the form [(source_symbol, source_location,
105 object_symbol_with_offset)].
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700106
Ben Chengb42dad02013-04-25 15:14:04 -0700107 If the function has been inlined then the list may contain
108 more than one element with the symbols for the most deeply
109 nested inlined location appearing first. The list is
110 always non-empty, even if no information is available.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700111
Ben Chengb42dad02013-04-25 15:14:04 -0700112 Usually you want to display the source_location and
113 object_symbol_with_offset from the last element in the list.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700114 """
115 info = SymbolInformationForSet(lib, set([addr]))
Ben Chengb42dad02013-04-25 15:14:04 -0700116 return (info and info.get(addr)) or [(None, None, None)]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700117
118
119def SymbolInformationForSet(lib, unique_addrs):
120 """Look up symbol information for a set of addresses from the given library.
121
122 Args:
123 lib: library (or executable) pathname containing symbols
124 unique_addrs: set of hexidecimal addresses
125
126 Returns:
Ben Chengb42dad02013-04-25 15:14:04 -0700127 A dictionary of the form {addr: [(source_symbol, source_location,
128 object_symbol_with_offset)]} where each address has a list of
129 associated symbols and locations. The list is always non-empty.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700130
Ben Chengb42dad02013-04-25 15:14:04 -0700131 If the function has been inlined then the list may contain
132 more than one element with the symbols for the most deeply
133 nested inlined location appearing first. The list is
134 always non-empty, even if no information is available.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700135
Ben Chengb42dad02013-04-25 15:14:04 -0700136 Usually you want to display the source_location and
137 object_symbol_with_offset from the last element in the list.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700138 """
139 if not lib:
140 return None
141
142 addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
143 if not addr_to_line:
144 return None
145
146 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
147 if not addr_to_objdump:
148 return None
149
150 result = {}
151 for addr in unique_addrs:
Ben Chengb42dad02013-04-25 15:14:04 -0700152 source_info = addr_to_line.get(addr)
153 if not source_info:
154 source_info = [(None, None)]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700155 if addr in addr_to_objdump:
156 (object_symbol, object_offset) = addr_to_objdump.get(addr)
157 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
158 object_offset)
159 else:
160 object_symbol_with_offset = None
Ben Chengb42dad02013-04-25 15:14:04 -0700161 result[addr] = [(source_symbol, source_location, object_symbol_with_offset)
162 for (source_symbol, source_location) in source_info]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700163
164 return result
165
166
167def CallAddr2LineForSet(lib, unique_addrs):
168 """Look up line and symbol information for a set of addresses.
169
170 Args:
171 lib: library (or executable) pathname containing symbols
172 unique_addrs: set of string hexidecimal addresses look up.
173
174 Returns:
Ben Chengb42dad02013-04-25 15:14:04 -0700175 A dictionary of the form {addr: [(symbol, file:line)]} where
176 each address has a list of associated symbols and locations
177 or an empty list if no symbol information was found.
178
179 If the function has been inlined then the list may contain
180 more than one element with the symbols for the most deeply
181 nested inlined location appearing first.
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700182 """
183 if not lib:
184 return None
185
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700186 symbols = SYMBOLS_DIR + lib
187 if not os.path.exists(symbols):
188 return None
189
Ben Chengb42dad02013-04-25 15:14:04 -0700190 cmd = [ToolPath("addr2line"), "--functions", "--inlines",
191 "--demangle", "--exe=" + symbols]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700192 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
193
194 result = {}
195 addrs = sorted(unique_addrs)
196 for addr in addrs:
197 child.stdin.write("0x%s\n" % addr)
198 child.stdin.flush()
Ben Chengb42dad02013-04-25 15:14:04 -0700199 records = []
200 first = True
201 while True:
202 symbol = child.stdout.readline().strip()
203 if symbol == "??":
204 symbol = None
205 location = child.stdout.readline().strip()
206 if location == "??:0":
207 location = None
208 if symbol is None and location is None:
209 break
210 records.append((symbol, location))
211 if first:
212 # Write a blank line as a sentinel so we know when to stop
213 # reading inlines from the output.
214 # The blank line will cause addr2line to emit "??\n??:0\n".
215 child.stdin.write("\n")
216 first = False
217 result[addr] = records
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700218 child.stdin.close()
219 child.stdout.close()
220 return result
221
222
Ben Chengb42dad02013-04-25 15:14:04 -0700223def StripPC(addr):
224 """Strips the Thumb bit a program counter address when appropriate.
225
226 Args:
227 addr: the program counter address
228
229 Returns:
230 The stripped program counter address.
231 """
232 global ARCH
Ben Chengb42dad02013-04-25 15:14:04 -0700233 if ARCH == "arm":
234 return addr & ~1
235 return addr
236
Elliott Hughesc3c86192014-08-29 13:49:57 -0700237
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700238def CallObjdumpForSet(lib, unique_addrs):
239 """Use objdump to find out the names of the containing functions.
240
241 Args:
242 lib: library (or executable) pathname containing symbols
243 unique_addrs: set of string hexidecimal addresses to find the functions for.
244
245 Returns:
246 A dictionary of the form {addr: (string symbol, offset)}.
247 """
248 if not lib:
249 return None
250
251 symbols = SYMBOLS_DIR + lib
252 if not os.path.exists(symbols):
253 return None
254
255 symbols = SYMBOLS_DIR + lib
256 if not os.path.exists(symbols):
257 return None
258
259 addrs = sorted(unique_addrs)
Ben Chengb42dad02013-04-25 15:14:04 -0700260 start_addr_dec = str(StripPC(int(addrs[0], 16)))
261 stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8)
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700262 cmd = [ToolPath("objdump"),
263 "--section=.text",
264 "--demangle",
265 "--disassemble",
Ben Chengb42dad02013-04-25 15:14:04 -0700266 "--start-address=" + start_addr_dec,
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700267 "--stop-address=" + stop_addr_dec,
268 symbols]
269
270 # Function lines look like:
271 # 000177b0 <android::IBinder::~IBinder()+0x2c>:
272 # We pull out the address and function first. Then we check for an optional
273 # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
274 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
275 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
276
277 # A disassembly line looks like:
278 # 177b2: b510 push {r4, lr}
279 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
280
281 current_symbol = None # The current function symbol in the disassembly.
282 current_symbol_addr = 0 # The address of the current function.
283 addr_index = 0 # The address that we are currently looking for.
284
285 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
286 result = {}
287 for line in stream:
288 # Is it a function line like:
289 # 000177b0 <android::IBinder::~IBinder()>:
290 components = func_regexp.match(line)
291 if components:
292 # This is a new function, so record the current function and its address.
293 current_symbol_addr = int(components.group(1), 16)
294 current_symbol = components.group(2)
295
296 # Does it have an optional offset like: "foo(..)+0x2c"?
297 components = offset_regexp.match(current_symbol)
298 if components:
299 current_symbol = components.group(1)
300 offset = components.group(2)
301 if offset:
302 current_symbol_addr -= int(offset, 16)
303
304 # Is it an disassembly line like:
305 # 177b2: b510 push {r4, lr}
306 components = asm_regexp.match(line)
307 if components:
308 addr = components.group(1)
309 target_addr = addrs[addr_index]
310 i_addr = int(addr, 16)
Ben Chengb42dad02013-04-25 15:14:04 -0700311 i_target = StripPC(int(target_addr, 16))
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700312 if i_addr == i_target:
313 result[target_addr] = (current_symbol, i_target - current_symbol_addr)
314 addr_index += 1
315 if addr_index >= len(addrs):
316 break
317 stream.close()
318
319 return result
320
321
322def CallCppFilt(mangled_symbol):
323 cmd = [ToolPath("c++filt")]
324 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
325 process.stdin.write(mangled_symbol)
326 process.stdin.write("\n")
327 process.stdin.close()
328 demangled_symbol = process.stdout.readline().strip()
329 process.stdout.close()
330 return demangled_symbol
331
Elliott Hughesc3c86192014-08-29 13:49:57 -0700332
Iliyan Malchev4929d6a2011-08-04 17:44:40 -0700333def FormatSymbolWithOffset(symbol, offset):
334 if offset == 0:
335 return symbol
336 return "%s+%d" % (symbol, offset)
Elliott Hughesc3c86192014-08-29 13:49:57 -0700337
338
339
340class FindToolchainTests(unittest.TestCase):
341 def assert_toolchain_found(self, abi):
342 global ARCH
343 ARCH = abi
344 FindToolchain() # Will throw on failure.
345
346 def test_toolchains_found(self):
347 self.assert_toolchain_found("arm")
348 self.assert_toolchain_found("arm64")
349 self.assert_toolchain_found("mips")
350 self.assert_toolchain_found("x86")
351 self.assert_toolchain_found("x86_64")
352
353
354if __name__ == '__main__':
355 unittest.main()