blob: e36304c4cb9a7e9744d112df0780618d075e5f54 [file] [log] [blame]
Iliyan Malchev4929d6a2011-08-04 17:44:40 -07001#!/usr/bin/python
2#
3# Copyright 2006 Google Inc. All Rights Reserved.
4
5"""Module for looking up symbolic debugging information.
6
7The information can include symbol names, offsets, and source locations.
8"""
9
10import os
11import re
12import subprocess
13
14ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"]
15if not ANDROID_BUILD_TOP:
16 ANDROID_BUILD_TOP = "."
17
18def FindSymbolsDir():
19 saveddir = os.getcwd()
20 os.chdir(ANDROID_BUILD_TOP)
21 try:
22 cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core "
23 "SRC_TARGET_DIR=build/target make -f build/core/config.mk "
24 "dumpvar-abs-TARGET_OUT_UNSTRIPPED")
25 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout
26 return os.path.join(ANDROID_BUILD_TOP, stream.read().strip())
27 finally:
28 os.chdir(saveddir)
29
30SYMBOLS_DIR = FindSymbolsDir()
31
32def Uname():
33 """'uname' for constructing prebuilt/<...> and out/host/<...> paths."""
34 uname = os.uname()[0]
35 if uname == "Darwin":
36 proc = os.uname()[-1]
37 if proc == "i386" or proc == "x86_64":
38 return "darwin-x86"
39 return "darwin-ppc"
40 if uname == "Linux":
41 return "linux-x86"
42 return uname
43
44def ToolPath(tool, toolchain_info=None):
45 """Return a full qualified path to the specified tool"""
46 if not toolchain_info:
47 toolchain_info = TOOLCHAIN_INFO
48 (label, target) = toolchain_info
49 return os.path.join(ANDROID_BUILD_TOP, "prebuilt", Uname(), "toolchain", label, "bin",
50 target + "-" + tool)
51
52def FindToolchain():
53 """Look for the latest available toolchain
54
55 Args:
56 None
57
58 Returns:
59 A pair of strings containing toolchain label and target prefix.
60 """
61
62 ## Known toolchains, newer ones in the front.
63 known_toolchains = [
64 ("arm-linux-androideabi-4.4.x", "arm-linux-androideabi"),
65 ("arm-eabi-4.4.3", "arm-eabi"),
66 ("arm-eabi-4.4.0", "arm-eabi"),
67 ("arm-eabi-4.3.1", "arm-eabi"),
68 ("arm-eabi-4.2.1", "arm-eabi")
69 ]
70
71 # Look for addr2line to check for valid toolchain path.
72 for (label, target) in known_toolchains:
73 toolchain_info = (label, target);
74 if os.path.exists(ToolPath("addr2line", toolchain_info)):
75 return toolchain_info
76
77 raise Exception("Could not find tool chain")
78
79TOOLCHAIN_INFO = FindToolchain()
80
81def SymbolInformation(lib, addr):
82 """Look up symbol information about an address.
83
84 Args:
85 lib: library (or executable) pathname containing symbols
86 addr: string hexidecimal address
87
88 Returns:
89 For a given library and address, return tuple of: (source_symbol,
90 source_location, object_symbol_with_offset) the values may be None
91 if the information was unavailable.
92
93 source_symbol may not be a prefix of object_symbol_with_offset if
94 the source function was inlined in the object code of another
95 function.
96
97 usually you want to display the object_symbol_with_offset and
98 source_location, the source_symbol is only useful to show if the
99 address was from an inlined function.
100 """
101 info = SymbolInformationForSet(lib, set([addr]))
102 return (info and info.get(addr)) or (None, None, None)
103
104
105def SymbolInformationForSet(lib, unique_addrs):
106 """Look up symbol information for a set of addresses from the given library.
107
108 Args:
109 lib: library (or executable) pathname containing symbols
110 unique_addrs: set of hexidecimal addresses
111
112 Returns:
113 For a given library and set of addresses, returns a dictionary of the form
114 {addr: (source_symbol, source_location, object_symbol_with_offset)}. The
115 values may be None if the information was unavailable.
116
117 For a given address, source_symbol may not be a prefix of
118 object_symbol_with_offset if the source function was inlined in the
119 object code of another function.
120
121 Usually you want to display the object_symbol_with_offset and
122 source_location; the source_symbol is only useful to show if the
123 address was from an inlined function.
124 """
125 if not lib:
126 return None
127
128 addr_to_line = CallAddr2LineForSet(lib, unique_addrs)
129 if not addr_to_line:
130 return None
131
132 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs)
133 if not addr_to_objdump:
134 return None
135
136 result = {}
137 for addr in unique_addrs:
138 (source_symbol, source_location) = addr_to_line.get(addr, (None, None))
139 if addr in addr_to_objdump:
140 (object_symbol, object_offset) = addr_to_objdump.get(addr)
141 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol,
142 object_offset)
143 else:
144 object_symbol_with_offset = None
145 result[addr] = (source_symbol, source_location, object_symbol_with_offset)
146
147 return result
148
149
150def CallAddr2LineForSet(lib, unique_addrs):
151 """Look up line and symbol information for a set of addresses.
152
153 Args:
154 lib: library (or executable) pathname containing symbols
155 unique_addrs: set of string hexidecimal addresses look up.
156
157 Returns:
158 A dictionary of the form {addr: (symbol, file:line)}. The values may
159 be (None, None) if the address could not be looked up.
160 """
161 if not lib:
162 return None
163
164
165 symbols = SYMBOLS_DIR + lib
166 if not os.path.exists(symbols):
167 return None
168
169 (label, target) = TOOLCHAIN_INFO
170 cmd = [ToolPath("addr2line"), "--functions", "--demangle", "--exe=" + symbols]
171 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
172
173 result = {}
174 addrs = sorted(unique_addrs)
175 for addr in addrs:
176 child.stdin.write("0x%s\n" % addr)
177 child.stdin.flush()
178 symbol = child.stdout.readline().strip()
179 if symbol == "??":
180 symbol = None
181 location = child.stdout.readline().strip()
182 if location == "??:0":
183 location = None
184 result[addr] = (symbol, location)
185 child.stdin.close()
186 child.stdout.close()
187 return result
188
189
190def CallObjdumpForSet(lib, unique_addrs):
191 """Use objdump to find out the names of the containing functions.
192
193 Args:
194 lib: library (or executable) pathname containing symbols
195 unique_addrs: set of string hexidecimal addresses to find the functions for.
196
197 Returns:
198 A dictionary of the form {addr: (string symbol, offset)}.
199 """
200 if not lib:
201 return None
202
203 symbols = SYMBOLS_DIR + lib
204 if not os.path.exists(symbols):
205 return None
206
207 symbols = SYMBOLS_DIR + lib
208 if not os.path.exists(symbols):
209 return None
210
211 addrs = sorted(unique_addrs)
212 start_addr_hex = addrs[0]
213 stop_addr_dec = str(int(addrs[-1], 16) + 8)
214 cmd = [ToolPath("objdump"),
215 "--section=.text",
216 "--demangle",
217 "--disassemble",
218 "--start-address=0x" + start_addr_hex,
219 "--stop-address=" + stop_addr_dec,
220 symbols]
221
222 # Function lines look like:
223 # 000177b0 <android::IBinder::~IBinder()+0x2c>:
224 # We pull out the address and function first. Then we check for an optional
225 # offset. This is tricky due to functions that look like "operator+(..)+0x2c"
226 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$")
227 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)")
228
229 # A disassembly line looks like:
230 # 177b2: b510 push {r4, lr}
231 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$")
232
233 current_symbol = None # The current function symbol in the disassembly.
234 current_symbol_addr = 0 # The address of the current function.
235 addr_index = 0 # The address that we are currently looking for.
236
237 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout
238 result = {}
239 for line in stream:
240 # Is it a function line like:
241 # 000177b0 <android::IBinder::~IBinder()>:
242 components = func_regexp.match(line)
243 if components:
244 # This is a new function, so record the current function and its address.
245 current_symbol_addr = int(components.group(1), 16)
246 current_symbol = components.group(2)
247
248 # Does it have an optional offset like: "foo(..)+0x2c"?
249 components = offset_regexp.match(current_symbol)
250 if components:
251 current_symbol = components.group(1)
252 offset = components.group(2)
253 if offset:
254 current_symbol_addr -= int(offset, 16)
255
256 # Is it an disassembly line like:
257 # 177b2: b510 push {r4, lr}
258 components = asm_regexp.match(line)
259 if components:
260 addr = components.group(1)
261 target_addr = addrs[addr_index]
262 i_addr = int(addr, 16)
263 i_target = int(target_addr, 16)
264 if i_addr == i_target:
265 result[target_addr] = (current_symbol, i_target - current_symbol_addr)
266 addr_index += 1
267 if addr_index >= len(addrs):
268 break
269 stream.close()
270
271 return result
272
273
274def CallCppFilt(mangled_symbol):
275 cmd = [ToolPath("c++filt")]
276 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
277 process.stdin.write(mangled_symbol)
278 process.stdin.write("\n")
279 process.stdin.close()
280 demangled_symbol = process.stdout.readline().strip()
281 process.stdout.close()
282 return demangled_symbol
283
284def FormatSymbolWithOffset(symbol, offset):
285 if offset == 0:
286 return symbol
287 return "%s+%d" % (symbol, offset)