blob: cb0229b358cb75f29328697386a4936e9814bbc8 [file] [log] [blame]
Krzysztof Kosińskib1361112021-03-11 18:05:01 -08001#!/usr/bin/env python3
Ben Chengb42dad02013-04-25 15:14:04 -07002#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""stack symbolizes native crash dumps."""
18
David Srbecky3a3349e2021-10-28 13:08:10 +010019import collections
20import functools
Christopher Ferrisece64c42015-08-20 20:09:09 -070021import os
David Srbecky3a3349e2021-10-28 13:08:10 +010022import pathlib
Ben Chengb42dad02013-04-25 15:14:04 -070023import re
Christopher Ferrisece64c42015-08-20 20:09:09 -070024import subprocess
Ben Chengb42dad02013-04-25 15:14:04 -070025import symbol
Christopher Ferrisece64c42015-08-20 20:09:09 -070026import tempfile
Elliott Hughesa9e34172014-07-01 14:56:22 -070027import unittest
Ben Chengb42dad02013-04-25 15:14:04 -070028
Elliott Hughesc3166be2014-07-07 15:06:28 -070029import example_crashes
30
Ben Chengb42dad02013-04-25 15:14:04 -070031def ConvertTrace(lines):
Brigid Smithea0a8352014-06-30 16:01:40 -070032 tracer = TraceConverter()
Krzysztof Kosińskib1361112021-03-11 18:05:01 -080033 print("Reading symbols from", symbol.SYMBOLS_DIR)
Brigid Smithea0a8352014-06-30 16:01:40 -070034 tracer.ConvertTrace(lines)
35
36class TraceConverter:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -080037 process_info_line = re.compile(r"(pid: [0-9]+, tid: [0-9]+.*)")
38 revision_line = re.compile(r"(Revision: '(.*)')")
39 signal_line = re.compile(r"(signal [0-9]+ \(.*\).*)")
40 abort_message_line = re.compile(r"(Abort message: '.*')")
41 thread_line = re.compile(r"(.*)(--- ){15}---")
Ben Chengb42dad02013-04-25 15:14:04 -070042 dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
43 dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
Brigid Smithea0a8352014-06-30 16:01:40 -070044 register_line = re.compile("$a")
45 trace_line = re.compile("$a")
Andreas Gamped900d082015-08-21 15:25:03 -070046 sanitizer_trace_line = re.compile("$a")
Brigid Smithea0a8352014-06-30 16:01:40 -070047 value_line = re.compile("$a")
48 code_line = re.compile("$a")
Krzysztof Kosińskib1361112021-03-11 18:05:01 -080049 zipinfo_central_directory_line = re.compile(r"Central\s+directory\s+entry")
Christopher Ferrisabe22f42016-03-16 12:17:59 -070050 zipinfo_central_info_match = re.compile(
Krzysztof Kosińskib1361112021-03-11 18:05:01 -080051 r"^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)"
52 r".*^\s*compressed size:\s+(\d+)", re.M | re.S)
53 unreachable_line = re.compile(r"((\d+ bytes in \d+ unreachable allocations)|"
54 r"(\d+ bytes unreachable at [0-9a-f]+)|"
55 r"(referencing \d+ unreachable bytes in \d+ allocation(s)?)|"
56 r"(and \d+ similar unreachable bytes in \d+ allocation(s)?))")
Ben Chengb42dad02013-04-25 15:14:04 -070057 trace_lines = []
58 value_lines = []
59 last_frame = -1
Brigid Smithea0a8352014-06-30 16:01:40 -070060 width = "{8}"
Elliott Hughesc3c86192014-08-29 13:49:57 -070061 spacing = ""
Christopher Ferrisece64c42015-08-20 20:09:09 -070062 apk_info = dict()
Christopher Ferris29fcc842022-11-02 17:33:30 -070063 lib_to_path = dict()
Ben Chengb42dad02013-04-25 15:14:04 -070064
Elliott Hughesa9e34172014-07-01 14:56:22 -070065 register_names = {
66 "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr",
Elliott Hughesbe4de462014-07-14 17:15:41 -070067 "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate",
Elliott Hughesa9e34172014-07-01 14:56:22 -070068 "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
69 "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags",
70 "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags",
Prashanth Swaminathan01fd4182023-03-08 13:16:35 -080071 "riscv64": "ra|sp|gp|tp|t0|t1|t2|s0|s1|a0|a1|a2|a3|a4|a5|a6|a7|s2|s3|s4|s5|s6|s7|s8|s9|s10|s11|t3|t4|t5|t6|pc",
Elliott Hughesa9e34172014-07-01 14:56:22 -070072 }
73
David Srbecky3a3349e2021-10-28 13:08:10 +010074 # We use the "file" command line tool to extract BuildId from ELF files.
75 ElfInfo = collections.namedtuple("ElfInfo", ["bitness", "build_id"])
Christopher Ferris29fcc842022-11-02 17:33:30 -070076 readelf_output = re.compile(r"Class:\s*ELF(?P<bitness>32|64).*"
77 r"Build ID:\s*(?P<build_id>[0-9a-f]+)",
78 flags=re.DOTALL)
David Srbecky3a3349e2021-10-28 13:08:10 +010079
Elliott Hughesa9e34172014-07-01 14:56:22 -070080 def UpdateAbiRegexes(self):
Prashanth Swaminathan01fd4182023-03-08 13:16:35 -080081 if symbol.ARCH == "arm64" or symbol.ARCH == "x86_64" or symbol.ARCH == "riscv64":
Brigid Smithea0a8352014-06-30 16:01:40 -070082 self.width = "{16}"
Elliott Hughesc3c86192014-08-29 13:49:57 -070083 self.spacing = " "
Brigid Smith15142f72014-07-15 13:47:07 -070084 else:
85 self.width = "{8}"
Elliott Hughesc3c86192014-08-29 13:49:57 -070086 self.spacing = ""
Brigid Smithea0a8352014-06-30 16:01:40 -070087
Prashanth Swaminathan01fd4182023-03-08 13:16:35 -080088 self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){1,5}$)")
Brigid Smithea0a8352014-06-30 16:01:40 -070089
90 # Note that both trace and value line matching allow for variable amounts of
91 # whitespace (e.g. \t). This is because the we want to allow for the stack
92 # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
93 # strips out double spaces that are found in tombsone files and logcat output.
94 #
95 # Examples of matched trace lines include lines from tombstone files like:
96 # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
97 #
98 # Or lines from AndroidFeedback crash report system logs like:
99 # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
100 # Please note the spacing differences.
Andreas Gamped900d082015-08-21 15:25:03 -0700101 self.trace_line = re.compile(
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800102 r".*" # Random start stuff.
103 r"\#(?P<frame>[0-9]+)" # Frame number.
104 r"[ \t]+..[ \t]+" # (space)pc(space).
105 r"(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without
106 # 0x prefix).
107 r"(?P<dso>\[[^\]]+\]|[^\r\n \t]*)" # Library name.
108 r"( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so.
David Srbecky3a3349e2021-10-28 13:08:10 +0100109 r"(?P<symbolpresent> \((?P<symbol>.*?)\))?" # Is the symbol there? (non-greedy)
110 r"( \(BuildId: (?P<build_id>.*)\))?" # Optional build-id of the ELF file.
111 r"[ \t]*$") # End of line (to expand non-greedy match).
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800112 # pylint: disable-msg=C6310
Andreas Gamped900d082015-08-21 15:25:03 -0700113 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as
114 # its own regex. Example:
115 # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237)
116 self.sanitizer_trace_line = re.compile(
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800117 r".*" # Random start stuff.
118 r"\#(?P<frame>[0-9]+)" # Frame number.
119 r"[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us.
120 r"\(" # Opening paren.
121 r"(?P<dso>[^+]+)" # Library name.
122 r"\+" # '+'
123 r"0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with
124 # 0x prefix).
125 r"\)") # Closing paren.
126 # pylint: disable-msg=C6310
Brigid Smithea0a8352014-06-30 16:01:40 -0700127 # Examples of matched value lines include:
128 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
129 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol)
130 # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
131 # Again, note the spacing differences.
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800132 self.value_line = re.compile(r"(.*)([0-9a-f]" + self.width + r")[ \t]+([0-9a-f]" + self.width + r")[ \t]+([^\r\n \t]*)( \((.*)\))?")
Brigid Smithea0a8352014-06-30 16:01:40 -0700133 # Lines from 'code around' sections of the output will be matched before
134 # value lines because otheriwse the 'code around' sections will be confused as
135 # value lines.
136 #
137 # Examples include:
138 # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
139 # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800140 self.code_line = re.compile(r"(.*)[ \t]*[a-f0-9]" + self.width +
141 r"[ \t]*[a-f0-9]" + self.width +
142 r"[ \t]*[a-f0-9]" + self.width +
143 r"[ \t]*[a-f0-9]" + self.width +
144 r"[ \t]*[a-f0-9]" + self.width +
145 r"[ \t]*[ \r\n]") # pylint: disable-msg=C6310
Brigid Smithea0a8352014-06-30 16:01:40 -0700146
147 def CleanLine(self, ln):
148 # AndroidFeedback adds zero width spaces into its crash reports. These
149 # should be removed or the regular expresssions will fail to match.
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800150 return ln.encode().decode(encoding='utf8', errors='ignore')
Brigid Smithea0a8352014-06-30 16:01:40 -0700151
152 def PrintTraceLines(self, trace_lines):
153 """Print back trace."""
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800154 maxlen = max(len(tl[1]) for tl in trace_lines)
155 print("\nStack Trace:")
156 print(" RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE")
Brigid Smithea0a8352014-06-30 16:01:40 -0700157 for tl in self.trace_lines:
158 (addr, symbol_with_offset, location) = tl
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800159 print(" %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location))
Brigid Smithea0a8352014-06-30 16:01:40 -0700160
161 def PrintValueLines(self, value_lines):
162 """Print stack data values."""
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800163 maxlen = max(len(tl[2]) for tl in self.value_lines)
164 print("\nStack Data:")
165 print(" ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE")
Brigid Smithea0a8352014-06-30 16:01:40 -0700166 for vl in self.value_lines:
167 (addr, value, symbol_with_offset, location) = vl
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800168 print(" %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location))
Brigid Smithea0a8352014-06-30 16:01:40 -0700169
170 def PrintOutput(self, trace_lines, value_lines):
171 if self.trace_lines:
172 self.PrintTraceLines(self.trace_lines)
173 if self.value_lines:
174 self.PrintValueLines(self.value_lines)
175
176 def PrintDivider(self):
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800177 print("\n-----------------------------------------------------\n")
Brigid Smithea0a8352014-06-30 16:01:40 -0700178
Christopher Ferrisece64c42015-08-20 20:09:09 -0700179 def DeleteApkTmpFiles(self):
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700180 for _, _, tmp_files in self.apk_info.values():
181 for tmp_file in tmp_files.values():
182 os.unlink(tmp_file)
Christopher Ferrisece64c42015-08-20 20:09:09 -0700183
Brigid Smithea0a8352014-06-30 16:01:40 -0700184 def ConvertTrace(self, lines):
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800185 lines = [self.CleanLine(line) for line in lines]
Christopher Ferrisece64c42015-08-20 20:09:09 -0700186 try:
Christopher Ferrisbf8a9402016-03-11 15:50:46 -0800187 if not symbol.ARCH:
188 symbol.SetAbi(lines)
189 self.UpdateAbiRegexes()
Christopher Ferrisece64c42015-08-20 20:09:09 -0700190 for line in lines:
191 self.ProcessLine(line)
192 self.PrintOutput(self.trace_lines, self.value_lines)
193 finally:
194 # Delete any temporary files created while processing the lines.
195 self.DeleteApkTmpFiles()
Brigid Smithea0a8352014-06-30 16:01:40 -0700196
Andreas Gamped900d082015-08-21 15:25:03 -0700197 def MatchTraceLine(self, line):
David Srbecky3a3349e2021-10-28 13:08:10 +0100198 match = self.trace_line.match(line)
199 if match:
Andreas Gamped900d082015-08-21 15:25:03 -0700200 return {"frame": match.group("frame"),
201 "offset": match.group("offset"),
Christopher Ferrisece64c42015-08-20 20:09:09 -0700202 "so_offset": match.group("so_offset"),
Andreas Gamped900d082015-08-21 15:25:03 -0700203 "dso": match.group("dso"),
204 "symbol_present": bool(match.group("symbolpresent")),
David Srbecky3a3349e2021-10-28 13:08:10 +0100205 "symbol_name": match.group("symbol"),
206 "build_id": match.group("build_id")}
207 match = self.sanitizer_trace_line.match(line)
208 if match:
Andreas Gamped900d082015-08-21 15:25:03 -0700209 return {"frame": match.group("frame"),
210 "offset": match.group("offset"),
Andreas Gampe57acd5f2015-09-17 11:44:21 -0700211 "so_offset": None,
Andreas Gamped900d082015-08-21 15:25:03 -0700212 "dso": match.group("dso"),
213 "symbol_present": False,
David Srbecky3a3349e2021-10-28 13:08:10 +0100214 "symbol_name": None,
215 "build_id": None}
Andreas Gamped900d082015-08-21 15:25:03 -0700216 return None
217
Christopher Ferrisece64c42015-08-20 20:09:09 -0700218 def ExtractLibFromApk(self, apk, shared_lib_name):
219 # Create a temporary file containing the shared library from the apk.
220 tmp_file = None
221 try:
222 tmp_fd, tmp_file = tempfile.mkstemp()
223 if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0:
224 os.close(tmp_fd)
225 shared_file = tmp_file
226 tmp_file = None
227 return shared_file
228 finally:
229 if tmp_file:
230 os.close(tmp_fd)
231 os.unlink(tmp_file)
232 return None
233
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700234 def ProcessCentralInfo(self, offset_list, central_info):
235 match = self.zipinfo_central_info_match.search(central_info)
236 if not match:
237 raise Exception("Cannot find all info from zipinfo\n" + central_info)
238 name = match.group(1)
239 start = int(match.group(2))
240 end = start + int(match.group(3))
241
242 offset_list.append([name, start, end])
243 return name, start, end
244
Christopher Ferrisece64c42015-08-20 20:09:09 -0700245 def GetLibFromApk(self, apk, offset):
246 # Convert the string to hex.
247 offset = int(offset, 16)
248
249 # Check if we already have information about this offset.
250 if apk in self.apk_info:
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700251 apk_full_path, offset_list, tmp_files = self.apk_info[apk]
252 for file_name, start, end in offset_list:
253 if offset >= start and offset < end:
254 if file_name in tmp_files:
255 return file_name, tmp_files[file_name]
Christopher Ferrisece64c42015-08-20 20:09:09 -0700256 tmp_file = self.ExtractLibFromApk(apk_full_path, file_name)
257 if tmp_file:
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700258 tmp_files[file_name] = tmp_file
Christopher Ferrisece64c42015-08-20 20:09:09 -0700259 return file_name, tmp_file
260 break
261 return None, None
262
263 if not "ANDROID_PRODUCT_OUT" in os.environ:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800264 print("ANDROID_PRODUCT_OUT environment variable not set.")
Christopher Ferrisece64c42015-08-20 20:09:09 -0700265 return None, None
266 out_dir = os.environ["ANDROID_PRODUCT_OUT"]
267 if not os.path.exists(out_dir):
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800268 print("ANDROID_PRODUCT_OUT", out_dir, "does not exist.")
Christopher Ferrisece64c42015-08-20 20:09:09 -0700269 return None, None
270 if apk.startswith("/"):
271 apk_full_path = out_dir + apk
272 else:
273 apk_full_path = os.path.join(out_dir, apk)
274 if not os.path.exists(apk_full_path):
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800275 print("Cannot find apk", apk)
Christopher Ferrisece64c42015-08-20 20:09:09 -0700276 return None, None
277
Christopher Ferris7ea56f02021-10-21 00:20:18 +0000278 cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE,
279 encoding='utf8')
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700280 # Find the first central info marker.
281 for line in cmd.stdout:
282 if self.zipinfo_central_directory_line.search(line):
283 break
284
285 central_info = ""
286 file_name = None
Christopher Ferrisece64c42015-08-20 20:09:09 -0700287 offset_list = []
288 for line in cmd.stdout:
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700289 match = self.zipinfo_central_directory_line.search(line)
Christopher Ferrisece64c42015-08-20 20:09:09 -0700290 if match:
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700291 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
292 if not file_name and offset >= start and offset < end:
293 file_name = cur_name
294 central_info = ""
295 else:
296 central_info += line
297 if central_info:
298 cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info)
299 if not file_name and offset >= start and offset < end:
300 file_name = cur_name
Christopher Ferrisece64c42015-08-20 20:09:09 -0700301
Christopher Ferris7ea56f02021-10-21 00:20:18 +0000302 # Make sure the offset_list is sorted, the zip file does not guarantee
303 # that the entries are in order.
304 offset_list = sorted(offset_list, key=lambda entry: entry[1])
305
Christopher Ferrisece64c42015-08-20 20:09:09 -0700306 # Save the information from the zip.
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700307 tmp_files = dict()
308 self.apk_info[apk] = [apk_full_path, offset_list, tmp_files]
309 if not file_name:
Christopher Ferrisece64c42015-08-20 20:09:09 -0700310 return None, None
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700311 tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name)
Christopher Ferrisece64c42015-08-20 20:09:09 -0700312 if tmp_shared_lib:
Christopher Ferrisabe22f42016-03-16 12:17:59 -0700313 tmp_files[file_name] = tmp_shared_lib
314 return file_name, tmp_shared_lib
Christopher Ferrisece64c42015-08-20 20:09:09 -0700315 return None, None
316
David Srbecky3a3349e2021-10-28 13:08:10 +0100317 # Find all files in the symbols directory and group them by basename (without directory).
David Srbeckydf6d4822021-10-29 11:35:18 +0100318 @functools.lru_cache(maxsize=None)
David Srbecky3a3349e2021-10-28 13:08:10 +0100319 def GlobSymbolsDir(self, symbols_dir):
320 files_by_basename = {}
321 for path in sorted(pathlib.Path(symbols_dir).glob("**/*")):
Christopher Ferrise6e02b02022-11-08 17:47:37 -0800322 if os.path.isfile(path):
323 files_by_basename.setdefault(path.name, []).append(path)
David Srbecky3a3349e2021-10-28 13:08:10 +0100324 return files_by_basename
325
326 # Use the "file" command line tool to find the bitness and build_id of given ELF file.
David Srbeckydf6d4822021-10-29 11:35:18 +0100327 @functools.lru_cache(maxsize=None)
David Srbecky3a3349e2021-10-28 13:08:10 +0100328 def GetLibraryInfo(self, lib):
Christopher Ferris29fcc842022-11-02 17:33:30 -0700329 stdout = subprocess.check_output([symbol.ToolPath("llvm-readelf"), "-h", "-n", lib], text=True)
330 match = self.readelf_output.search(stdout)
David Srbecky3a3349e2021-10-28 13:08:10 +0100331 if match:
332 return self.ElfInfo(bitness=match.group("bitness"), build_id=match.group("build_id"))
333 return None
334
335 # Search for a library with the given basename and build_id anywhere in the symbols directory.
David Srbeckydf6d4822021-10-29 11:35:18 +0100336 @functools.lru_cache(maxsize=None)
David Srbecky3a3349e2021-10-28 13:08:10 +0100337 def GetLibraryByBuildId(self, symbols_dir, basename, build_id):
Christopher Ferris27bee5a2022-01-25 13:05:53 -0800338 for candidate in self.GlobSymbolsDir(symbols_dir).get(basename, []):
David Srbecky3a3349e2021-10-28 13:08:10 +0100339 info = self.GetLibraryInfo(candidate)
340 if info and info.build_id == build_id:
341 return "/" + str(candidate.relative_to(symbols_dir))
342 return None
343
Christopher Ferris1f2051d2021-09-10 14:41:00 -0700344 def GetLibPath(self, lib):
Christopher Ferris29fcc842022-11-02 17:33:30 -0700345 if lib in self.lib_to_path:
346 return self.lib_to_path[lib]
347
348 lib_path = self.FindLibPath(lib)
349 self.lib_to_path[lib] = lib_path
350 return lib_path
351
352 def FindLibPath(self, lib):
Christopher Ferris1f2051d2021-09-10 14:41:00 -0700353 symbol_dir = symbol.SYMBOLS_DIR
354 if os.path.isfile(symbol_dir + lib):
355 return lib
356
Christopher Ferris598cc362022-04-22 14:37:56 -0700357 # Try and rewrite any apex files if not found in symbols.
358 # For some reason, the directory in symbols does not match
359 # the path on system.
360 # The path is com.android.<directory> on device, but
361 # com.google.android.<directory> in symbols.
362 new_lib = lib.replace("/com.android.", "/com.google.android.")
363 if os.path.isfile(symbol_dir + new_lib):
364 return new_lib
365
Christopher Ferris1f2051d2021-09-10 14:41:00 -0700366 # When using atest, test paths are different between the out/ directory
367 # and device. Apply fixups.
368 if not lib.startswith("/data/local/tests/") and not lib.startswith("/data/local/tmp/"):
369 print("WARNING: Cannot find %s in symbol directory" % lib)
370 return lib
371
372 test_name = lib.rsplit("/", 1)[-1]
373 test_dir = "/data/nativetest"
374 test_dir_bitness = ""
375 if symbol.ARCH.endswith("64"):
376 bitness = "64"
377 test_dir_bitness = "64"
378 else:
379 bitness = "32"
380
381 # Unfortunately, the location of the real symbol file is not
382 # standardized, so we need to go hunting for it.
383
384 # This is in vendor, look for the value in:
385 # /data/nativetest{64}/vendor/test_name/test_name
386 if lib.startswith("/data/local/tests/vendor/"):
Christopher Ferris29fcc842022-11-02 17:33:30 -0700387 lib_path = os.path.join(test_dir + test_dir_bitness, "vendor", test_name, test_name)
388 if os.path.isfile(symbol_dir + lib_path):
389 return lib_path
Christopher Ferris1f2051d2021-09-10 14:41:00 -0700390
391 # Look for the path in:
392 # /data/nativetest{64}/test_name/test_name
393 lib_path = os.path.join(test_dir + test_dir_bitness, test_name, test_name)
394 if os.path.isfile(symbol_dir + lib_path):
395 return lib_path
396
397 # CtsXXX tests are in really non-standard locations try:
398 # /data/nativetest/{test_name}
399 lib_path = os.path.join(test_dir, test_name)
400 if os.path.isfile(symbol_dir + lib_path):
401 return lib_path
402 # Try:
403 # /data/nativetest/{test_name}{32|64}
404 lib_path += bitness
405 if os.path.isfile(symbol_dir + lib_path):
406 return lib_path
407
408 # Cannot find location, give up and return the original path
409 print("WARNING: Cannot find %s in symbol directory" % lib)
410 return lib
411
412
Brigid Smithea0a8352014-06-30 16:01:40 -0700413 def ProcessLine(self, line):
Brigid Smith9c2192a2014-07-07 10:33:21 -0700414 ret = False
Brigid Smithea0a8352014-06-30 16:01:40 -0700415 process_header = self.process_info_line.search(line)
416 signal_header = self.signal_line.search(line)
417 abort_message_header = self.abort_message_line.search(line)
418 thread_header = self.thread_line.search(line)
419 register_header = self.register_line.search(line)
Brigid Smith0b309402014-07-07 14:34:00 -0700420 revision_header = self.revision_line.search(line)
Brigid Smithea0a8352014-06-30 16:01:40 -0700421 dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line)
422 dalvik_native_thread_header = self.dalvik_native_thread_line.search(line)
Colin Cross807ec0e2016-03-04 17:29:01 -0800423 unreachable_header = self.unreachable_line.search(line)
Christopher Ferrisbf8a9402016-03-11 15:50:46 -0800424 if process_header or signal_header or abort_message_header or thread_header or \
Colin Cross807ec0e2016-03-04 17:29:01 -0800425 register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \
426 revision_header or unreachable_header:
427 ret = True
Brigid Smithea0a8352014-06-30 16:01:40 -0700428 if self.trace_lines or self.value_lines:
429 self.PrintOutput(self.trace_lines, self.value_lines)
430 self.PrintDivider()
431 self.trace_lines = []
432 self.value_lines = []
433 self.last_frame = -1
Ben Chengb42dad02013-04-25 15:14:04 -0700434 if process_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800435 print(process_header.group(1))
Ben Chengb42dad02013-04-25 15:14:04 -0700436 if signal_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800437 print(signal_header.group(1))
Elliott Hughesd2471c82014-06-17 16:55:10 -0700438 if abort_message_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800439 print(abort_message_header.group(1))
Ben Chengb42dad02013-04-25 15:14:04 -0700440 if register_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800441 print(register_header.group(1))
Ben Chengb42dad02013-04-25 15:14:04 -0700442 if thread_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800443 print(thread_header.group(1))
Ben Chengb42dad02013-04-25 15:14:04 -0700444 if dalvik_jni_thread_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800445 print(dalvik_jni_thread_header.group(1))
Ben Chengb42dad02013-04-25 15:14:04 -0700446 if dalvik_native_thread_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800447 print(dalvik_native_thread_header.group(1))
Brigid Smith0b309402014-07-07 14:34:00 -0700448 if revision_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800449 print(revision_header.group(1))
Colin Cross807ec0e2016-03-04 17:29:01 -0800450 if unreachable_header:
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800451 print(unreachable_header.group(1))
Christopher Ferrisbf8a9402016-03-11 15:50:46 -0800452 return True
Andreas Gamped900d082015-08-21 15:25:03 -0700453 trace_line_dict = self.MatchTraceLine(line)
454 if trace_line_dict is not None:
Brigid Smith9c2192a2014-07-07 10:33:21 -0700455 ret = True
Andreas Gampe48068ac2016-07-25 21:07:27 -0700456 frame = int(trace_line_dict["frame"])
Andreas Gamped900d082015-08-21 15:25:03 -0700457 code_addr = trace_line_dict["offset"]
458 area = trace_line_dict["dso"]
Christopher Ferrisece64c42015-08-20 20:09:09 -0700459 so_offset = trace_line_dict["so_offset"]
Andreas Gamped900d082015-08-21 15:25:03 -0700460 symbol_present = trace_line_dict["symbol_present"]
461 symbol_name = trace_line_dict["symbol_name"]
David Srbecky3a3349e2021-10-28 13:08:10 +0100462 build_id = trace_line_dict["build_id"]
Ben Chengb42dad02013-04-25 15:14:04 -0700463
Brigid Smithea0a8352014-06-30 16:01:40 -0700464 if frame <= self.last_frame and (self.trace_lines or self.value_lines):
465 self.PrintOutput(self.trace_lines, self.value_lines)
466 self.PrintDivider()
467 self.trace_lines = []
468 self.value_lines = []
469 self.last_frame = frame
Ben Chengb42dad02013-04-25 15:14:04 -0700470
Brigid Smithea0a8352014-06-30 16:01:40 -0700471 if area == "<unknown>" or area == "[heap]" or area == "[stack]":
472 self.trace_lines.append((code_addr, "", area))
Ben Chengb42dad02013-04-25 15:14:04 -0700473 else:
Christopher Ferrisece64c42015-08-20 20:09:09 -0700474 # If this is an apk, it usually means that there is actually
475 # a shared so that was loaded directly out of it. In that case,
476 # extract the shared library and the name of the shared library.
477 lib = None
Christopher Ferris55a96992019-03-13 15:37:31 -0700478 # The format of the map name:
479 # Some.apk!libshared.so
480 # or
481 # Some.apk
482 if so_offset:
483 # If it ends in apk, we are done.
484 apk = None
485 if area.endswith(".apk"):
486 apk = area
487 else:
Christopher Ferris5e4b3722020-01-31 14:38:07 -0800488 index = area.rfind(".so!")
Christopher Ferris55a96992019-03-13 15:37:31 -0700489 if index != -1:
Christopher Ferris5e4b3722020-01-31 14:38:07 -0800490 # Sometimes we'll see something like:
491 # #01 pc abcd libart.so!libart.so (offset 0x134000)
492 # Remove everything after the ! and zero the offset value.
493 area = area[0:index + 3]
494 so_offset = 0
495 else:
496 index = area.rfind(".apk!")
497 if index != -1:
498 apk = area[0:index + 4]
Christopher Ferrisac90d1d2019-03-15 12:21:57 -0700499 if apk:
Christopher Ferris55a96992019-03-13 15:37:31 -0700500 lib_name, lib = self.GetLibFromApk(apk, so_offset)
Christopher Ferris7d4d5272022-05-05 15:04:58 -0700501 else:
502 # Sometimes we'll see something like:
503 # #01 pc abcd libart.so!libart.so
504 # Remove everything after the !.
505 index = area.rfind(".so!")
506 if index != -1:
507 area = area[0:index + 3]
Christopher Ferrisece64c42015-08-20 20:09:09 -0700508 if not lib:
509 lib = area
510 lib_name = None
511
David Srbecky3a3349e2021-10-28 13:08:10 +0100512 if build_id:
513 # If we have the build_id, do a brute-force search of the symbols directory.
David Srbeckydf6d4822021-10-29 11:35:18 +0100514 basename = os.path.basename(lib)
515 lib = self.GetLibraryByBuildId(symbol.SYMBOLS_DIR, basename, build_id)
David Srbecky3a3349e2021-10-28 13:08:10 +0100516 if not lib:
517 print("WARNING: Cannot find {} with build id {} in symbols directory."
David Srbeckydf6d4822021-10-29 11:35:18 +0100518 .format(basename, build_id))
David Srbecky3a3349e2021-10-28 13:08:10 +0100519 else:
520 # When using atest, test paths are different between the out/ directory
521 # and device. Apply fixups.
522 lib = self.GetLibPath(lib)
Krzysztof Kosińskidd45e182021-02-24 15:21:50 -0800523
Ben Chengb42dad02013-04-25 15:14:04 -0700524 # If a calls b which further calls c and c is inlined to b, we want to
525 # display "a -> b -> c" in the stack trace instead of just "a -> c"
Christopher Ferrisece64c42015-08-20 20:09:09 -0700526 info = symbol.SymbolInformation(lib, code_addr)
Ben Chengb42dad02013-04-25 15:14:04 -0700527 nest_count = len(info) - 1
David Srbecky80547ae2021-11-01 21:59:59 +0000528 for (source_symbol, source_location, symbol_with_offset) in info:
Ben Chengb42dad02013-04-25 15:14:04 -0700529 if not source_symbol:
530 if symbol_present:
531 source_symbol = symbol.CallCppFilt(symbol_name)
532 else:
Brigid Smithea0a8352014-06-30 16:01:40 -0700533 source_symbol = "<unknown>"
David Srbecky80547ae2021-11-01 21:59:59 +0000534 if not symbol.VERBOSE:
535 source_symbol = symbol.FormatSymbolWithoutParameters(source_symbol)
536 symbol_with_offset = symbol.FormatSymbolWithoutParameters(symbol_with_offset)
Ben Chengb42dad02013-04-25 15:14:04 -0700537 if not source_location:
538 source_location = area
Christopher Ferrisece64c42015-08-20 20:09:09 -0700539 if lib_name:
540 source_location += "(" + lib_name + ")"
Ben Chengb42dad02013-04-25 15:14:04 -0700541 if nest_count > 0:
542 nest_count = nest_count - 1
Brigid Smithea0a8352014-06-30 16:01:40 -0700543 arrow = "v------>"
Prashanth Swaminathan01fd4182023-03-08 13:16:35 -0800544 if symbol.ARCH == "arm64" or symbol.ARCH == "x86_64" or symbol.ARCH == "riscv64":
Brigid Smithea0a8352014-06-30 16:01:40 -0700545 arrow = "v-------------->"
546 self.trace_lines.append((arrow, source_symbol, source_location))
Ben Chengb42dad02013-04-25 15:14:04 -0700547 else:
David Srbecky80547ae2021-11-01 21:59:59 +0000548 if not symbol_with_offset:
549 symbol_with_offset = source_symbol
550 self.trace_lines.append((code_addr, symbol_with_offset, source_location))
Brigid Smithea0a8352014-06-30 16:01:40 -0700551 if self.code_line.match(line):
Ben Chengb42dad02013-04-25 15:14:04 -0700552 # Code lines should be ignored. If this were exluded the 'code around'
553 # sections would trigger value_line matches.
Brigid Smith9c2192a2014-07-07 10:33:21 -0700554 return ret
Brigid Smithea0a8352014-06-30 16:01:40 -0700555 if self.value_line.match(line):
Brigid Smith9c2192a2014-07-07 10:33:21 -0700556 ret = True
Brigid Smithea0a8352014-06-30 16:01:40 -0700557 match = self.value_line.match(line)
Ben Chengb42dad02013-04-25 15:14:04 -0700558 (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
Brigid Smithea0a8352014-06-30 16:01:40 -0700559 if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area:
560 self.value_lines.append((addr, value, "", area))
Ben Chengb42dad02013-04-25 15:14:04 -0700561 else:
562 info = symbol.SymbolInformation(area, value)
563 (source_symbol, source_location, object_symbol_with_offset) = info.pop()
Christopher Ferris5f1b4f02016-09-19 13:24:37 -0700564 # If there is no information, skip this.
565 if source_symbol or source_location or object_symbol_with_offset:
566 if not source_symbol:
567 if symbol_present:
568 source_symbol = symbol.CallCppFilt(symbol_name)
569 else:
570 source_symbol = "<unknown>"
571 if not source_location:
572 source_location = area
573 if not object_symbol_with_offset:
574 object_symbol_with_offset = source_symbol
575 self.value_lines.append((addr,
576 value,
577 object_symbol_with_offset,
578 source_location))
Ben Chengb42dad02013-04-25 15:14:04 -0700579
Brigid Smith9c2192a2014-07-07 10:33:21 -0700580 return ret
Elliott Hughesa9e34172014-07-01 14:56:22 -0700581
582
Elliott Hughesa9e34172014-07-01 14:56:22 -0700583class RegisterPatternTests(unittest.TestCase):
584 def assert_register_matches(self, abi, example_crash, stupid_pattern):
585 tc = TraceConverter()
Christopher Ferrisbf8a9402016-03-11 15:50:46 -0800586 lines = example_crash.split('\n')
587 symbol.SetAbi(lines)
588 tc.UpdateAbiRegexes()
589 for line in lines:
Elliott Hughesc3c86192014-08-29 13:49:57 -0700590 tc.ProcessLine(line)
Elliott Hughesa9e34172014-07-01 14:56:22 -0700591 is_register = (re.search(stupid_pattern, line) is not None)
592 matched = (tc.register_line.search(line) is not None)
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800593 self.assertEqual(matched, is_register, line)
Elliott Hughesc3c86192014-08-29 13:49:57 -0700594 tc.PrintOutput(tc.trace_lines, tc.value_lines)
Elliott Hughesa9e34172014-07-01 14:56:22 -0700595
596 def test_arm_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700597 self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700598
599 def test_arm64_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700600 self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700601
602 def test_mips_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700603 self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700604
605 def test_x86_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700606 self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700607
608 def test_x86_64_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700609 self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700610
Prashanth Swaminathan01fd4182023-03-08 13:16:35 -0800611 def test_riscv64_registers(self):
612 self.assert_register_matches("riscv64", example_crashes.riscv64, '\\b(gp|t2|t6|s3|s7|s11|a3|a7|sp)\\b')
613
Colin Cross807ec0e2016-03-04 17:29:01 -0800614class LibmemunreachablePatternTests(unittest.TestCase):
615 def test_libmemunreachable(self):
616 tc = TraceConverter()
617 lines = example_crashes.libmemunreachable.split('\n')
618
619 symbol.SetAbi(lines)
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800620 self.assertEqual(symbol.ARCH, "arm")
Colin Cross807ec0e2016-03-04 17:29:01 -0800621
622 tc.UpdateAbiRegexes()
623 header_lines = 0
Colin Cross1127df92016-07-26 10:15:01 -0700624 trace_lines = 0
Colin Cross807ec0e2016-03-04 17:29:01 -0800625 for line in lines:
626 tc.ProcessLine(line)
627 if re.search(tc.unreachable_line, line) is not None:
628 header_lines += 1
Colin Cross1127df92016-07-26 10:15:01 -0700629 if tc.MatchTraceLine(line) is not None:
630 trace_lines += 1
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800631 self.assertEqual(header_lines, 3)
632 self.assertEqual(trace_lines, 2)
Colin Cross807ec0e2016-03-04 17:29:01 -0800633 tc.PrintOutput(tc.trace_lines, tc.value_lines)
Elliott Hughesa9e34172014-07-01 14:56:22 -0700634
Andreas Gampe48068ac2016-07-25 21:07:27 -0700635class LongASANStackTests(unittest.TestCase):
636 # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two
637 # when the frame number becomes two digits. This happened before as the frame number was
638 # handled as a string and not converted to an integral.
639 def test_long_asan_crash(self):
640 tc = TraceConverter()
641 lines = example_crashes.long_asan_crash.splitlines()
642 symbol.SetAbi(lines)
643 tc.UpdateAbiRegexes()
644 # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace
645 # is split, a separator is printed and trace_lines is flushed.
646 trace_line_count = 0
647 for line in lines:
648 tc.ProcessLine(line)
649 self.assertLessEqual(trace_line_count, len(tc.trace_lines))
650 trace_line_count = len(tc.trace_lines)
651 # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored)
652 # more than ten frames.
653 self.assertGreater(trace_line_count, 10)
654 tc.PrintOutput(tc.trace_lines, tc.value_lines)
655
Christopher Ferris5f1b4f02016-09-19 13:24:37 -0700656class ValueLinesTest(unittest.TestCase):
657 def test_value_line_skipped(self):
658 tc = TraceConverter()
659 symbol.SetAbi(["ABI: 'arm'"])
660 tc.UpdateAbiRegexes()
661 tc.ProcessLine(" 12345678 00001000 .")
662 self.assertEqual([], tc.value_lines)
663
Elliott Hughesa9e34172014-07-01 14:56:22 -0700664if __name__ == '__main__':
Krzysztof Kosińskib1361112021-03-11 18:05:01 -0800665 unittest.main(verbosity=2)