| #!/usr/bin/env python |
| # |
| # Copyright (C) 2013 The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """stack symbolizes native crash dumps.""" |
| |
| import os |
| import re |
| import subprocess |
| import symbol |
| import tempfile |
| import unittest |
| |
| import example_crashes |
| |
| def ConvertTrace(lines): |
| tracer = TraceConverter() |
| print "Reading symbols from", symbol.SYMBOLS_DIR |
| tracer.ConvertTrace(lines) |
| |
| class TraceConverter: |
| process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)") |
| revision_line = re.compile("(Revision: \'(.*)\')") |
| signal_line = re.compile("(signal [0-9]+ \(.*\).*)") |
| abort_message_line = re.compile("(Abort message: '.*')") |
| thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-") |
| dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)") |
| dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)") |
| register_line = re.compile("$a") |
| trace_line = re.compile("$a") |
| sanitizer_trace_line = re.compile("$a") |
| value_line = re.compile("$a") |
| code_line = re.compile("$a") |
| zipinfo_central_directory_line = re.compile("Central\s+directory\s+entry") |
| zipinfo_central_info_match = re.compile( |
| "^\s*(\S+)$\s*offset of local header from start of archive:\s*(\d+)" |
| ".*^\s*compressed size:\s+(\d+)", re.M | re.S) |
| unreachable_line = re.compile("((\d+ bytes in \d+ unreachable allocations)|"+\ |
| "(\d+ bytes unreachable at [0-9a-f]+)|"+\ |
| "(referencing \d+ unreachable bytes in \d+ allocation(s)?)|"+\ |
| "(and \d+ similar unreachable bytes in \d+ allocation(s)?))") |
| trace_lines = [] |
| value_lines = [] |
| last_frame = -1 |
| width = "{8}" |
| spacing = "" |
| apk_info = dict() |
| |
| register_names = { |
| "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr", |
| "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate", |
| "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", |
| "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc", |
| "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags", |
| "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags", |
| } |
| |
| def UpdateAbiRegexes(self): |
| if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": |
| self.width = "{16}" |
| self.spacing = " " |
| else: |
| self.width = "{8}" |
| self.spacing = "" |
| |
| self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})") |
| |
| # Note that both trace and value line matching allow for variable amounts of |
| # whitespace (e.g. \t). This is because the we want to allow for the stack |
| # tool to operate on AndroidFeedback provided system logs. AndroidFeedback |
| # strips out double spaces that are found in tombsone files and logcat output. |
| # |
| # Examples of matched trace lines include lines from tombstone files like: |
| # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so |
| # |
| # Or lines from AndroidFeedback crash report system logs like: |
| # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so |
| # Please note the spacing differences. |
| self.trace_line = re.compile( |
| ".*" # Random start stuff. |
| "\#(?P<frame>[0-9]+)" # Frame number. |
| "[ \t]+..[ \t]+" # (space)pc(space). |
| "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without |
| # 0x prefix). |
| "(?P<dso>\[[^\]]+\]|[^\r\n \t]*)" # Library name. |
| "( \(offset (?P<so_offset>0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so. |
| "(?P<symbolpresent> \((?P<symbol>.*)\))?") # Is the symbol there? |
| # pylint: disable-msg=C6310 |
| # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as |
| # its own regex. Example: |
| # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237) |
| self.sanitizer_trace_line = re.compile( |
| ".*" # Random start stuff. |
| "\#(?P<frame>[0-9]+)" # Frame number. |
| "[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us. |
| "\(" # Opening paren. |
| "(?P<dso>[^+]+)" # Library name. |
| "\+" # '+' |
| "0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with |
| # 0x prefix). |
| "\)") # Closin paren. |
| # pylint: disable-msg=C6310 |
| # Examples of matched value lines include: |
| # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so |
| # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol) |
| # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so |
| # Again, note the spacing differences. |
| self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?") |
| # Lines from 'code around' sections of the output will be matched before |
| # value lines because otheriwse the 'code around' sections will be confused as |
| # value lines. |
| # |
| # Examples include: |
| # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 |
| # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8 |
| self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width + |
| "[ \t]*[a-f0-9]" + self.width + |
| "[ \t]*[a-f0-9]" + self.width + |
| "[ \t]*[a-f0-9]" + self.width + |
| "[ \t]*[a-f0-9]" + self.width + |
| "[ \t]*[ \r\n]") # pylint: disable-msg=C6310 |
| |
| def CleanLine(self, ln): |
| # AndroidFeedback adds zero width spaces into its crash reports. These |
| # should be removed or the regular expresssions will fail to match. |
| return unicode(ln, errors='ignore') |
| |
| def PrintTraceLines(self, trace_lines): |
| """Print back trace.""" |
| maxlen = max(map(lambda tl: len(tl[1]), trace_lines)) |
| print |
| print "Stack Trace:" |
| print " RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE" |
| for tl in self.trace_lines: |
| (addr, symbol_with_offset, location) = tl |
| print " %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location) |
| return |
| |
| def PrintValueLines(self, value_lines): |
| """Print stack data values.""" |
| maxlen = max(map(lambda tl: len(tl[2]), self.value_lines)) |
| print |
| print "Stack Data:" |
| print " ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE" |
| for vl in self.value_lines: |
| (addr, value, symbol_with_offset, location) = vl |
| print " %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location) |
| return |
| |
| def PrintOutput(self, trace_lines, value_lines): |
| if self.trace_lines: |
| self.PrintTraceLines(self.trace_lines) |
| if self.value_lines: |
| self.PrintValueLines(self.value_lines) |
| |
| def PrintDivider(self): |
| print |
| print "-----------------------------------------------------\n" |
| |
| def DeleteApkTmpFiles(self): |
| for _, _, tmp_files in self.apk_info.values(): |
| for tmp_file in tmp_files.values(): |
| os.unlink(tmp_file) |
| |
| def ConvertTrace(self, lines): |
| lines = map(self.CleanLine, lines) |
| try: |
| if not symbol.ARCH: |
| symbol.SetAbi(lines) |
| self.UpdateAbiRegexes() |
| for line in lines: |
| self.ProcessLine(line) |
| self.PrintOutput(self.trace_lines, self.value_lines) |
| finally: |
| # Delete any temporary files created while processing the lines. |
| self.DeleteApkTmpFiles() |
| |
| def MatchTraceLine(self, line): |
| if self.trace_line.match(line): |
| match = self.trace_line.match(line) |
| return {"frame": match.group("frame"), |
| "offset": match.group("offset"), |
| "so_offset": match.group("so_offset"), |
| "dso": match.group("dso"), |
| "symbol_present": bool(match.group("symbolpresent")), |
| "symbol_name": match.group("symbol")} |
| if self.sanitizer_trace_line.match(line): |
| match = self.sanitizer_trace_line.match(line) |
| return {"frame": match.group("frame"), |
| "offset": match.group("offset"), |
| "so_offset": None, |
| "dso": match.group("dso"), |
| "symbol_present": False, |
| "symbol_name": None} |
| return None |
| |
| def ExtractLibFromApk(self, apk, shared_lib_name): |
| # Create a temporary file containing the shared library from the apk. |
| tmp_file = None |
| try: |
| tmp_fd, tmp_file = tempfile.mkstemp() |
| if subprocess.call(["unzip", "-p", apk, shared_lib_name], stdout=tmp_fd) == 0: |
| os.close(tmp_fd) |
| shared_file = tmp_file |
| tmp_file = None |
| return shared_file |
| finally: |
| if tmp_file: |
| os.close(tmp_fd) |
| os.unlink(tmp_file) |
| return None |
| |
| def ProcessCentralInfo(self, offset_list, central_info): |
| match = self.zipinfo_central_info_match.search(central_info) |
| if not match: |
| raise Exception("Cannot find all info from zipinfo\n" + central_info) |
| name = match.group(1) |
| start = int(match.group(2)) |
| end = start + int(match.group(3)) |
| |
| offset_list.append([name, start, end]) |
| return name, start, end |
| |
| def GetLibFromApk(self, apk, offset): |
| # Convert the string to hex. |
| offset = int(offset, 16) |
| |
| # Check if we already have information about this offset. |
| if apk in self.apk_info: |
| apk_full_path, offset_list, tmp_files = self.apk_info[apk] |
| for file_name, start, end in offset_list: |
| if offset >= start and offset < end: |
| if file_name in tmp_files: |
| return file_name, tmp_files[file_name] |
| tmp_file = self.ExtractLibFromApk(apk_full_path, file_name) |
| if tmp_file: |
| tmp_files[file_name] = tmp_file |
| return file_name, tmp_file |
| break |
| return None, None |
| |
| if not "ANDROID_PRODUCT_OUT" in os.environ: |
| print "ANDROID_PRODUCT_OUT environment variable not set." |
| return None, None |
| out_dir = os.environ["ANDROID_PRODUCT_OUT"] |
| if not os.path.exists(out_dir): |
| print "ANDROID_PRODUCT_OUT " + out_dir + " does not exist." |
| return None, None |
| if apk.startswith("/"): |
| apk_full_path = out_dir + apk |
| else: |
| apk_full_path = os.path.join(out_dir, apk) |
| if not os.path.exists(apk_full_path): |
| print "Cannot find apk " + apk; |
| return None, None |
| |
| cmd = subprocess.Popen(["zipinfo", "-v", apk_full_path], stdout=subprocess.PIPE) |
| # Find the first central info marker. |
| for line in cmd.stdout: |
| if self.zipinfo_central_directory_line.search(line): |
| break |
| |
| central_info = "" |
| file_name = None |
| offset_list = [] |
| for line in cmd.stdout: |
| match = self.zipinfo_central_directory_line.search(line) |
| if match: |
| cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) |
| if not file_name and offset >= start and offset < end: |
| file_name = cur_name |
| central_info = "" |
| else: |
| central_info += line |
| if central_info: |
| cur_name, start, end = self.ProcessCentralInfo(offset_list, central_info) |
| if not file_name and offset >= start and offset < end: |
| file_name = cur_name |
| |
| # Save the information from the zip. |
| tmp_files = dict() |
| self.apk_info[apk] = [apk_full_path, offset_list, tmp_files] |
| if not file_name: |
| return None, None |
| tmp_shared_lib = self.ExtractLibFromApk(apk_full_path, file_name) |
| if tmp_shared_lib: |
| tmp_files[file_name] = tmp_shared_lib |
| return file_name, tmp_shared_lib |
| return None, None |
| |
| def ProcessLine(self, line): |
| ret = False |
| process_header = self.process_info_line.search(line) |
| signal_header = self.signal_line.search(line) |
| abort_message_header = self.abort_message_line.search(line) |
| thread_header = self.thread_line.search(line) |
| register_header = self.register_line.search(line) |
| revision_header = self.revision_line.search(line) |
| dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line) |
| dalvik_native_thread_header = self.dalvik_native_thread_line.search(line) |
| unreachable_header = self.unreachable_line.search(line) |
| if process_header or signal_header or abort_message_header or thread_header or \ |
| register_header or dalvik_jni_thread_header or dalvik_native_thread_header or \ |
| revision_header or unreachable_header: |
| ret = True |
| if self.trace_lines or self.value_lines: |
| self.PrintOutput(self.trace_lines, self.value_lines) |
| self.PrintDivider() |
| self.trace_lines = [] |
| self.value_lines = [] |
| self.last_frame = -1 |
| if process_header: |
| print process_header.group(1) |
| if signal_header: |
| print signal_header.group(1) |
| if abort_message_header: |
| print abort_message_header.group(1) |
| if register_header: |
| print register_header.group(1) |
| if thread_header: |
| print thread_header.group(1) |
| if dalvik_jni_thread_header: |
| print dalvik_jni_thread_header.group(1) |
| if dalvik_native_thread_header: |
| print dalvik_native_thread_header.group(1) |
| if revision_header: |
| print revision_header.group(1) |
| if unreachable_header: |
| print unreachable_header.group(1) |
| return True |
| trace_line_dict = self.MatchTraceLine(line) |
| if trace_line_dict is not None: |
| ret = True |
| frame = int(trace_line_dict["frame"]) |
| code_addr = trace_line_dict["offset"] |
| area = trace_line_dict["dso"] |
| so_offset = trace_line_dict["so_offset"] |
| symbol_present = trace_line_dict["symbol_present"] |
| symbol_name = trace_line_dict["symbol_name"] |
| |
| if frame <= self.last_frame and (self.trace_lines or self.value_lines): |
| self.PrintOutput(self.trace_lines, self.value_lines) |
| self.PrintDivider() |
| self.trace_lines = [] |
| self.value_lines = [] |
| self.last_frame = frame |
| |
| if area == "<unknown>" or area == "[heap]" or area == "[stack]": |
| self.trace_lines.append((code_addr, "", area)) |
| else: |
| # If this is an apk, it usually means that there is actually |
| # a shared so that was loaded directly out of it. In that case, |
| # extract the shared library and the name of the shared library. |
| lib = None |
| if area.endswith(".apk") and so_offset: |
| lib_name, lib = self.GetLibFromApk(area, so_offset) |
| if not lib: |
| lib = area |
| lib_name = None |
| |
| # If a calls b which further calls c and c is inlined to b, we want to |
| # display "a -> b -> c" in the stack trace instead of just "a -> c" |
| info = symbol.SymbolInformation(lib, code_addr) |
| nest_count = len(info) - 1 |
| for (source_symbol, source_location, object_symbol_with_offset) in info: |
| if not source_symbol: |
| if symbol_present: |
| source_symbol = symbol.CallCppFilt(symbol_name) |
| else: |
| source_symbol = "<unknown>" |
| if not source_location: |
| source_location = area |
| if lib_name: |
| source_location += "(" + lib_name + ")" |
| if nest_count > 0: |
| nest_count = nest_count - 1 |
| arrow = "v------>" |
| if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": |
| arrow = "v-------------->" |
| self.trace_lines.append((arrow, source_symbol, source_location)) |
| else: |
| if not object_symbol_with_offset: |
| object_symbol_with_offset = source_symbol |
| self.trace_lines.append((code_addr, |
| object_symbol_with_offset, |
| source_location)) |
| if self.code_line.match(line): |
| # Code lines should be ignored. If this were exluded the 'code around' |
| # sections would trigger value_line matches. |
| return ret |
| if self.value_line.match(line): |
| ret = True |
| match = self.value_line.match(line) |
| (unused_, addr, value, area, symbol_present, symbol_name) = match.groups() |
| if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area: |
| self.value_lines.append((addr, value, "", area)) |
| else: |
| info = symbol.SymbolInformation(area, value) |
| (source_symbol, source_location, object_symbol_with_offset) = info.pop() |
| # If there is no information, skip this. |
| if source_symbol or source_location or object_symbol_with_offset: |
| if not source_symbol: |
| if symbol_present: |
| source_symbol = symbol.CallCppFilt(symbol_name) |
| else: |
| source_symbol = "<unknown>" |
| if not source_location: |
| source_location = area |
| if not object_symbol_with_offset: |
| object_symbol_with_offset = source_symbol |
| self.value_lines.append((addr, |
| value, |
| object_symbol_with_offset, |
| source_location)) |
| |
| return ret |
| |
| |
| class RegisterPatternTests(unittest.TestCase): |
| def assert_register_matches(self, abi, example_crash, stupid_pattern): |
| tc = TraceConverter() |
| lines = example_crash.split('\n') |
| symbol.SetAbi(lines) |
| tc.UpdateAbiRegexes() |
| for line in lines: |
| tc.ProcessLine(line) |
| is_register = (re.search(stupid_pattern, line) is not None) |
| matched = (tc.register_line.search(line) is not None) |
| self.assertEquals(matched, is_register, line) |
| tc.PrintOutput(tc.trace_lines, tc.value_lines) |
| |
| def test_arm_registers(self): |
| self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b') |
| |
| def test_arm64_registers(self): |
| self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b') |
| |
| def test_mips_registers(self): |
| self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b') |
| |
| def test_mips64_registers(self): |
| self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b') |
| |
| def test_x86_registers(self): |
| self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b') |
| |
| def test_x86_64_registers(self): |
| self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b') |
| |
| class LibmemunreachablePatternTests(unittest.TestCase): |
| def test_libmemunreachable(self): |
| tc = TraceConverter() |
| lines = example_crashes.libmemunreachable.split('\n') |
| |
| symbol.SetAbi(lines) |
| self.assertEquals(symbol.ARCH, "arm") |
| |
| tc.UpdateAbiRegexes() |
| header_lines = 0 |
| trace_lines = 0 |
| for line in lines: |
| tc.ProcessLine(line) |
| if re.search(tc.unreachable_line, line) is not None: |
| header_lines += 1 |
| if tc.MatchTraceLine(line) is not None: |
| trace_lines += 1 |
| self.assertEquals(header_lines, 3) |
| self.assertEquals(trace_lines, 2) |
| tc.PrintOutput(tc.trace_lines, tc.value_lines) |
| |
| class LongASANStackTests(unittest.TestCase): |
| # Test that a long ASAN-style (non-padded frame numbers) stack trace is not split into two |
| # when the frame number becomes two digits. This happened before as the frame number was |
| # handled as a string and not converted to an integral. |
| def test_long_asan_crash(self): |
| tc = TraceConverter() |
| lines = example_crashes.long_asan_crash.splitlines() |
| symbol.SetAbi(lines) |
| tc.UpdateAbiRegexes() |
| # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace |
| # is split, a separator is printed and trace_lines is flushed. |
| trace_line_count = 0 |
| for line in lines: |
| tc.ProcessLine(line) |
| self.assertLessEqual(trace_line_count, len(tc.trace_lines)) |
| trace_line_count = len(tc.trace_lines) |
| # The split happened at transition of frame #9 -> #10. Make sure we have parsed (and stored) |
| # more than ten frames. |
| self.assertGreater(trace_line_count, 10) |
| tc.PrintOutput(tc.trace_lines, tc.value_lines) |
| |
| class ValueLinesTest(unittest.TestCase): |
| def test_value_line_skipped(self): |
| tc = TraceConverter() |
| symbol.SetAbi(["ABI: 'arm'"]) |
| tc.UpdateAbiRegexes() |
| tc.ProcessLine(" 12345678 00001000 .") |
| self.assertEqual([], tc.value_lines) |
| |
| if __name__ == '__main__': |
| unittest.main() |