Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 3 | # Copyright (C) 2013 The Android Open Source Project |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 16 | |
| 17 | """Module for looking up symbolic debugging information. |
| 18 | |
| 19 | The information can include symbol names, offsets, and source locations. |
| 20 | """ |
| 21 | |
Elliott Hughes | 0836593 | 2014-06-13 18:12:25 -0700 | [diff] [blame] | 22 | import glob |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 23 | import os |
Yang Ni | e4b2a1a | 2014-11-06 17:42:33 -0800 | [diff] [blame] | 24 | import platform |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 25 | import re |
| 26 | import subprocess |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 27 | import unittest |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 28 | |
| 29 | ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"] |
| 30 | if not ANDROID_BUILD_TOP: |
| 31 | ANDROID_BUILD_TOP = "." |
| 32 | |
| 33 | def FindSymbolsDir(): |
| 34 | saveddir = os.getcwd() |
| 35 | os.chdir(ANDROID_BUILD_TOP) |
| 36 | try: |
| 37 | cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core " |
| 38 | "SRC_TARGET_DIR=build/target make -f build/core/config.mk " |
| 39 | "dumpvar-abs-TARGET_OUT_UNSTRIPPED") |
| 40 | stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout |
| 41 | return os.path.join(ANDROID_BUILD_TOP, stream.read().strip()) |
| 42 | finally: |
| 43 | os.chdir(saveddir) |
| 44 | |
| 45 | SYMBOLS_DIR = FindSymbolsDir() |
| 46 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 47 | ARCH = "arm" |
| 48 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 49 | |
| 50 | # These are private. Do not access them from other modules. |
| 51 | _CACHED_TOOLCHAIN = None |
| 52 | _CACHED_TOOLCHAIN_ARCH = None |
| 53 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 54 | |
Elliott Hughes | 0836593 | 2014-06-13 18:12:25 -0700 | [diff] [blame] | 55 | def ToolPath(tool, toolchain=None): |
| 56 | """Return a fully-qualified path to the specified tool""" |
| 57 | if not toolchain: |
| 58 | toolchain = FindToolchain() |
| 59 | return glob.glob(os.path.join(toolchain, "*-" + tool))[0] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 60 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 61 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 62 | def FindToolchain(): |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 63 | """Returns the toolchain matching ARCH.""" |
| 64 | global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH |
| 65 | if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH: |
| 66 | return _CACHED_TOOLCHAIN |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 67 | |
Elliott Hughes | c3166be | 2014-07-07 15:06:28 -0700 | [diff] [blame] | 68 | # We use slightly different names from GCC, and there's only one toolchain |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 69 | # for x86/x86_64. Note that these are the names of the top-level directory |
| 70 | # rather than the _different_ names used lower down the directory hierarchy! |
| 71 | gcc_dir = ARCH |
| 72 | if gcc_dir == "arm64": |
| 73 | gcc_dir = "aarch64" |
| 74 | elif gcc_dir == "mips64": |
| 75 | gcc_dir = "mips" |
| 76 | elif gcc_dir == "x86_64": |
| 77 | gcc_dir = "x86" |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 78 | |
Yang Ni | e4b2a1a | 2014-11-06 17:42:33 -0800 | [diff] [blame] | 79 | os_name = platform.system().lower(); |
| 80 | |
| 81 | available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir)) |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 82 | if len(available_toolchains) == 0: |
| 83 | raise Exception("Could not find tool chain for %s" % (ARCH)) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 84 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 85 | toolchain = sorted(available_toolchains)[-1] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 86 | |
Elliott Hughes | 0836593 | 2014-06-13 18:12:25 -0700 | [diff] [blame] | 87 | if not os.path.exists(ToolPath("addr2line", toolchain)): |
| 88 | raise Exception("No addr2line for %s" % (toolchain)) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 89 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 90 | _CACHED_TOOLCHAIN = toolchain |
| 91 | _CACHED_TOOLCHAIN_ARCH = ARCH |
| 92 | print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN) |
| 93 | return _CACHED_TOOLCHAIN |
| 94 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 95 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 96 | def SymbolInformation(lib, addr): |
| 97 | """Look up symbol information about an address. |
| 98 | |
| 99 | Args: |
| 100 | lib: library (or executable) pathname containing symbols |
| 101 | addr: string hexidecimal address |
| 102 | |
| 103 | Returns: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 104 | A list of the form [(source_symbol, source_location, |
| 105 | object_symbol_with_offset)]. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 106 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 107 | If the function has been inlined then the list may contain |
| 108 | more than one element with the symbols for the most deeply |
| 109 | nested inlined location appearing first. The list is |
| 110 | always non-empty, even if no information is available. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 111 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 112 | Usually you want to display the source_location and |
| 113 | object_symbol_with_offset from the last element in the list. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 114 | """ |
| 115 | info = SymbolInformationForSet(lib, set([addr])) |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 116 | return (info and info.get(addr)) or [(None, None, None)] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 117 | |
| 118 | |
| 119 | def SymbolInformationForSet(lib, unique_addrs): |
| 120 | """Look up symbol information for a set of addresses from the given library. |
| 121 | |
| 122 | Args: |
| 123 | lib: library (or executable) pathname containing symbols |
| 124 | unique_addrs: set of hexidecimal addresses |
| 125 | |
| 126 | Returns: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 127 | A dictionary of the form {addr: [(source_symbol, source_location, |
| 128 | object_symbol_with_offset)]} where each address has a list of |
| 129 | associated symbols and locations. The list is always non-empty. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 130 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 131 | If the function has been inlined then the list may contain |
| 132 | more than one element with the symbols for the most deeply |
| 133 | nested inlined location appearing first. The list is |
| 134 | always non-empty, even if no information is available. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 135 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 136 | Usually you want to display the source_location and |
| 137 | object_symbol_with_offset from the last element in the list. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 138 | """ |
| 139 | if not lib: |
| 140 | return None |
| 141 | |
| 142 | addr_to_line = CallAddr2LineForSet(lib, unique_addrs) |
| 143 | if not addr_to_line: |
| 144 | return None |
| 145 | |
| 146 | addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) |
| 147 | if not addr_to_objdump: |
| 148 | return None |
| 149 | |
| 150 | result = {} |
| 151 | for addr in unique_addrs: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 152 | source_info = addr_to_line.get(addr) |
| 153 | if not source_info: |
| 154 | source_info = [(None, None)] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 155 | if addr in addr_to_objdump: |
| 156 | (object_symbol, object_offset) = addr_to_objdump.get(addr) |
| 157 | object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, |
| 158 | object_offset) |
| 159 | else: |
| 160 | object_symbol_with_offset = None |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 161 | result[addr] = [(source_symbol, source_location, object_symbol_with_offset) |
| 162 | for (source_symbol, source_location) in source_info] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 163 | |
| 164 | return result |
| 165 | |
| 166 | |
| 167 | def CallAddr2LineForSet(lib, unique_addrs): |
| 168 | """Look up line and symbol information for a set of addresses. |
| 169 | |
| 170 | Args: |
| 171 | lib: library (or executable) pathname containing symbols |
| 172 | unique_addrs: set of string hexidecimal addresses look up. |
| 173 | |
| 174 | Returns: |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 175 | A dictionary of the form {addr: [(symbol, file:line)]} where |
| 176 | each address has a list of associated symbols and locations |
| 177 | or an empty list if no symbol information was found. |
| 178 | |
| 179 | If the function has been inlined then the list may contain |
| 180 | more than one element with the symbols for the most deeply |
| 181 | nested inlined location appearing first. |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 182 | """ |
| 183 | if not lib: |
| 184 | return None |
| 185 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 186 | symbols = SYMBOLS_DIR + lib |
| 187 | if not os.path.exists(symbols): |
| 188 | return None |
| 189 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 190 | cmd = [ToolPath("addr2line"), "--functions", "--inlines", |
| 191 | "--demangle", "--exe=" + symbols] |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 192 | child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 193 | |
| 194 | result = {} |
| 195 | addrs = sorted(unique_addrs) |
| 196 | for addr in addrs: |
| 197 | child.stdin.write("0x%s\n" % addr) |
| 198 | child.stdin.flush() |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 199 | records = [] |
| 200 | first = True |
| 201 | while True: |
| 202 | symbol = child.stdout.readline().strip() |
| 203 | if symbol == "??": |
| 204 | symbol = None |
| 205 | location = child.stdout.readline().strip() |
| 206 | if location == "??:0": |
| 207 | location = None |
| 208 | if symbol is None and location is None: |
| 209 | break |
| 210 | records.append((symbol, location)) |
| 211 | if first: |
| 212 | # Write a blank line as a sentinel so we know when to stop |
| 213 | # reading inlines from the output. |
| 214 | # The blank line will cause addr2line to emit "??\n??:0\n". |
| 215 | child.stdin.write("\n") |
| 216 | first = False |
| 217 | result[addr] = records |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 218 | child.stdin.close() |
| 219 | child.stdout.close() |
| 220 | return result |
| 221 | |
| 222 | |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 223 | def StripPC(addr): |
| 224 | """Strips the Thumb bit a program counter address when appropriate. |
| 225 | |
| 226 | Args: |
| 227 | addr: the program counter address |
| 228 | |
| 229 | Returns: |
| 230 | The stripped program counter address. |
| 231 | """ |
| 232 | global ARCH |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 233 | if ARCH == "arm": |
| 234 | return addr & ~1 |
| 235 | return addr |
| 236 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 237 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 238 | def CallObjdumpForSet(lib, unique_addrs): |
| 239 | """Use objdump to find out the names of the containing functions. |
| 240 | |
| 241 | Args: |
| 242 | lib: library (or executable) pathname containing symbols |
| 243 | unique_addrs: set of string hexidecimal addresses to find the functions for. |
| 244 | |
| 245 | Returns: |
| 246 | A dictionary of the form {addr: (string symbol, offset)}. |
| 247 | """ |
| 248 | if not lib: |
| 249 | return None |
| 250 | |
| 251 | symbols = SYMBOLS_DIR + lib |
| 252 | if not os.path.exists(symbols): |
| 253 | return None |
| 254 | |
| 255 | symbols = SYMBOLS_DIR + lib |
| 256 | if not os.path.exists(symbols): |
| 257 | return None |
| 258 | |
| 259 | addrs = sorted(unique_addrs) |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 260 | start_addr_dec = str(StripPC(int(addrs[0], 16))) |
| 261 | stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 262 | cmd = [ToolPath("objdump"), |
| 263 | "--section=.text", |
| 264 | "--demangle", |
| 265 | "--disassemble", |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 266 | "--start-address=" + start_addr_dec, |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 267 | "--stop-address=" + stop_addr_dec, |
| 268 | symbols] |
| 269 | |
| 270 | # Function lines look like: |
| 271 | # 000177b0 <android::IBinder::~IBinder()+0x2c>: |
| 272 | # We pull out the address and function first. Then we check for an optional |
| 273 | # offset. This is tricky due to functions that look like "operator+(..)+0x2c" |
| 274 | func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") |
| 275 | offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") |
| 276 | |
| 277 | # A disassembly line looks like: |
| 278 | # 177b2: b510 push {r4, lr} |
| 279 | asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") |
| 280 | |
| 281 | current_symbol = None # The current function symbol in the disassembly. |
| 282 | current_symbol_addr = 0 # The address of the current function. |
| 283 | addr_index = 0 # The address that we are currently looking for. |
| 284 | |
| 285 | stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout |
| 286 | result = {} |
| 287 | for line in stream: |
| 288 | # Is it a function line like: |
| 289 | # 000177b0 <android::IBinder::~IBinder()>: |
| 290 | components = func_regexp.match(line) |
| 291 | if components: |
| 292 | # This is a new function, so record the current function and its address. |
| 293 | current_symbol_addr = int(components.group(1), 16) |
| 294 | current_symbol = components.group(2) |
| 295 | |
| 296 | # Does it have an optional offset like: "foo(..)+0x2c"? |
| 297 | components = offset_regexp.match(current_symbol) |
| 298 | if components: |
| 299 | current_symbol = components.group(1) |
| 300 | offset = components.group(2) |
| 301 | if offset: |
| 302 | current_symbol_addr -= int(offset, 16) |
| 303 | |
| 304 | # Is it an disassembly line like: |
| 305 | # 177b2: b510 push {r4, lr} |
| 306 | components = asm_regexp.match(line) |
| 307 | if components: |
| 308 | addr = components.group(1) |
| 309 | target_addr = addrs[addr_index] |
| 310 | i_addr = int(addr, 16) |
Ben Cheng | b42dad0 | 2013-04-25 15:14:04 -0700 | [diff] [blame] | 311 | i_target = StripPC(int(target_addr, 16)) |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 312 | if i_addr == i_target: |
| 313 | result[target_addr] = (current_symbol, i_target - current_symbol_addr) |
| 314 | addr_index += 1 |
| 315 | if addr_index >= len(addrs): |
| 316 | break |
| 317 | stream.close() |
| 318 | |
| 319 | return result |
| 320 | |
| 321 | |
| 322 | def CallCppFilt(mangled_symbol): |
| 323 | cmd = [ToolPath("c++filt")] |
| 324 | process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| 325 | process.stdin.write(mangled_symbol) |
| 326 | process.stdin.write("\n") |
| 327 | process.stdin.close() |
| 328 | demangled_symbol = process.stdout.readline().strip() |
| 329 | process.stdout.close() |
| 330 | return demangled_symbol |
| 331 | |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 332 | |
Iliyan Malchev | 4929d6a | 2011-08-04 17:44:40 -0700 | [diff] [blame] | 333 | def FormatSymbolWithOffset(symbol, offset): |
| 334 | if offset == 0: |
| 335 | return symbol |
| 336 | return "%s+%d" % (symbol, offset) |
Elliott Hughes | c3c8619 | 2014-08-29 13:49:57 -0700 | [diff] [blame] | 337 | |
| 338 | |
| 339 | |
| 340 | class FindToolchainTests(unittest.TestCase): |
| 341 | def assert_toolchain_found(self, abi): |
| 342 | global ARCH |
| 343 | ARCH = abi |
| 344 | FindToolchain() # Will throw on failure. |
| 345 | |
| 346 | def test_toolchains_found(self): |
| 347 | self.assert_toolchain_found("arm") |
| 348 | self.assert_toolchain_found("arm64") |
| 349 | self.assert_toolchain_found("mips") |
| 350 | self.assert_toolchain_found("x86") |
| 351 | self.assert_toolchain_found("x86_64") |
| 352 | |
| 353 | |
| 354 | if __name__ == '__main__': |
| 355 | unittest.main() |