blob: 879ce388662b367ea1ca8611bfab92043c81b04c [file] [log] [blame]
Ben Chengb42dad02013-04-25 15:14:04 -07001#!/usr/bin/env python
2#
3# Copyright (C) 2013 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""stack symbolizes native crash dumps."""
18
19import re
Ben Chengb42dad02013-04-25 15:14:04 -070020import symbol
Elliott Hughesa9e34172014-07-01 14:56:22 -070021import unittest
Ben Chengb42dad02013-04-25 15:14:04 -070022
Elliott Hughesc3166be2014-07-07 15:06:28 -070023import example_crashes
24
Ben Chengb42dad02013-04-25 15:14:04 -070025def ConvertTrace(lines):
Brigid Smithea0a8352014-06-30 16:01:40 -070026 tracer = TraceConverter()
27 print "Reading symbols from", symbol.SYMBOLS_DIR
28 tracer.ConvertTrace(lines)
29
30class TraceConverter:
Ben Chengb42dad02013-04-25 15:14:04 -070031 process_info_line = re.compile("(pid: [0-9]+, tid: [0-9]+.*)")
Brigid Smith45a46c62014-06-10 17:31:32 -070032 abi_line = re.compile("(ABI: \'(.*)\')")
Brigid Smith0b309402014-07-07 14:34:00 -070033 revision_line = re.compile("(Revision: \'(.*)\')")
Ben Chengb42dad02013-04-25 15:14:04 -070034 signal_line = re.compile("(signal [0-9]+ \(.*\).*)")
Elliott Hughesd2471c82014-06-17 16:55:10 -070035 abort_message_line = re.compile("(Abort message: '.*')")
Ben Chengb42dad02013-04-25 15:14:04 -070036 thread_line = re.compile("(.*)(\-\-\- ){15}\-\-\-")
37 dalvik_jni_thread_line = re.compile("(\".*\" prio=[0-9]+ tid=[0-9]+ NATIVE.*)")
38 dalvik_native_thread_line = re.compile("(\".*\" sysTid=[0-9]+ nice=[0-9]+.*)")
Brigid Smithea0a8352014-06-30 16:01:40 -070039 register_line = re.compile("$a")
40 trace_line = re.compile("$a")
Andreas Gamped900d082015-08-21 15:25:03 -070041 sanitizer_trace_line = re.compile("$a")
Brigid Smithea0a8352014-06-30 16:01:40 -070042 value_line = re.compile("$a")
43 code_line = re.compile("$a")
Ben Chengb42dad02013-04-25 15:14:04 -070044 trace_lines = []
45 value_lines = []
46 last_frame = -1
Brigid Smithea0a8352014-06-30 16:01:40 -070047 width = "{8}"
Elliott Hughesc3c86192014-08-29 13:49:57 -070048 spacing = ""
Ben Chengb42dad02013-04-25 15:14:04 -070049
Brigid Smith15142f72014-07-15 13:47:07 -070050 def __init__(self):
51 self.UpdateAbiRegexes()
Brigid Smithea0a8352014-06-30 16:01:40 -070052
Elliott Hughesa9e34172014-07-01 14:56:22 -070053 register_names = {
54 "arm": "r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|sl|fp|ip|sp|lr|pc|cpsr",
Elliott Hughesbe4de462014-07-14 17:15:41 -070055 "arm64": "x0|x1|x2|x3|x4|x5|x6|x7|x8|x9|x10|x11|x12|x13|x14|x15|x16|x17|x18|x19|x20|x21|x22|x23|x24|x25|x26|x27|x28|x29|x30|sp|pc|pstate",
Elliott Hughesa9e34172014-07-01 14:56:22 -070056 "mips": "zr|at|v0|v1|a0|a1|a2|a3|t0|t1|t2|t3|t4|t5|t6|t7|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
Andreas Gampe55218412015-05-21 14:44:21 -070057 "mips64": "zr|at|v0|v1|a0|a1|a2|a3|a4|a5|a6|a7|t0|t1|t2|t3|s0|s1|s2|s3|s4|s5|s6|s7|t8|t9|k0|k1|gp|sp|s8|ra|hi|lo|bva|epc",
Elliott Hughesa9e34172014-07-01 14:56:22 -070058 "x86": "eax|ebx|ecx|edx|esi|edi|x?cs|x?ds|x?es|x?fs|x?ss|eip|ebp|esp|flags",
59 "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags",
60 }
61
62 def UpdateAbiRegexes(self):
Brigid Smithea0a8352014-06-30 16:01:40 -070063 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
64 self.width = "{16}"
Elliott Hughesc3c86192014-08-29 13:49:57 -070065 self.spacing = " "
Brigid Smith15142f72014-07-15 13:47:07 -070066 else:
67 self.width = "{8}"
Elliott Hughesc3c86192014-08-29 13:49:57 -070068 self.spacing = ""
Brigid Smithea0a8352014-06-30 16:01:40 -070069
Elliott Hughesbe4de462014-07-14 17:15:41 -070070 self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){2,5})")
Brigid Smithea0a8352014-06-30 16:01:40 -070071
72 # Note that both trace and value line matching allow for variable amounts of
73 # whitespace (e.g. \t). This is because the we want to allow for the stack
74 # tool to operate on AndroidFeedback provided system logs. AndroidFeedback
75 # strips out double spaces that are found in tombsone files and logcat output.
76 #
77 # Examples of matched trace lines include lines from tombstone files like:
78 # #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
79 #
80 # Or lines from AndroidFeedback crash report system logs like:
81 # 03-25 00:51:05.520 I/DEBUG ( 65): #00 pc 001cf42e /data/data/com.my.project/lib/libmyproject.so
82 # Please note the spacing differences.
Andreas Gamped900d082015-08-21 15:25:03 -070083 self.trace_line = re.compile(
84 ".*" # Random start stuff.
85 "\#(?P<frame>[0-9]+)" # Frame number.
86 "[ \t]+..[ \t]+" # (space)pc(space).
87 "(?P<offset>[0-9a-f]" + self.width + ")[ \t]+" # Offset (hex number given without
88 # 0x prefix).
89 "(?P<dso>[^\r\n \t]*)" # Library name.
90 "(?P<symbolpresent> \((?P<symbol>.*)\))?") # Is the symbol there?
91 # pylint: disable-msg=C6310
92 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as
93 # its own regex. Example:
94 # 08-19 05:29:26.283 397 403 I : #0 0xb6a15237 (/system/lib/libclang_rt.asan-arm-android.so+0x4f237)
95 self.sanitizer_trace_line = re.compile(
96 ".*" # Random start stuff.
97 "\#(?P<frame>[0-9]+)" # Frame number.
98 "[ \t]+0x[0-9a-f]+[ \t]+" # PC, not interesting to us.
99 "\(" # Opening paren.
100 "(?P<dso>[^+]+)" # Library name.
101 "\+" # '+'
102 "0x(?P<offset>[0-9a-f]+)" # Offset (hex number given with
103 # 0x prefix).
104 "\)") # Closin paren.
105 # pylint: disable-msg=C6310
Brigid Smithea0a8352014-06-30 16:01:40 -0700106 # Examples of matched value lines include:
107 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
108 # bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so (symbol)
109 # 03-25 00:51:05.530 I/DEBUG ( 65): bea4170c 8018e4e9 /data/data/com.my.project/lib/libmyproject.so
110 # Again, note the spacing differences.
111 self.value_line = re.compile("(.*)([0-9a-f]" + self.width + ")[ \t]+([0-9a-f]" + self.width + ")[ \t]+([^\r\n \t]*)( \((.*)\))?")
112 # Lines from 'code around' sections of the output will be matched before
113 # value lines because otheriwse the 'code around' sections will be confused as
114 # value lines.
115 #
116 # Examples include:
117 # 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
118 # 03-25 00:51:05.530 I/DEBUG ( 65): 801cf40c ffffc4cc 00b2f2c5 00b2f1c7 00c1e1a8
119 self.code_line = re.compile("(.*)[ \t]*[a-f0-9]" + self.width +
120 "[ \t]*[a-f0-9]" + self.width +
121 "[ \t]*[a-f0-9]" + self.width +
122 "[ \t]*[a-f0-9]" + self.width +
123 "[ \t]*[a-f0-9]" + self.width +
124 "[ \t]*[ \r\n]") # pylint: disable-msg=C6310
125
126 def CleanLine(self, ln):
127 # AndroidFeedback adds zero width spaces into its crash reports. These
128 # should be removed or the regular expresssions will fail to match.
129 return unicode(ln, errors='ignore')
130
131 def PrintTraceLines(self, trace_lines):
132 """Print back trace."""
133 maxlen = max(map(lambda tl: len(tl[1]), trace_lines))
Brigid Smithea0a8352014-06-30 16:01:40 -0700134 print
135 print "Stack Trace:"
Elliott Hughesc3c86192014-08-29 13:49:57 -0700136 print " RELADDR " + self.spacing + "FUNCTION".ljust(maxlen) + " FILE:LINE"
Brigid Smithea0a8352014-06-30 16:01:40 -0700137 for tl in self.trace_lines:
138 (addr, symbol_with_offset, location) = tl
139 print " %8s %s %s" % (addr, symbol_with_offset.ljust(maxlen), location)
140 return
141
142 def PrintValueLines(self, value_lines):
143 """Print stack data values."""
144 maxlen = max(map(lambda tl: len(tl[2]), self.value_lines))
145 print
146 print "Stack Data:"
Elliott Hughesc3c86192014-08-29 13:49:57 -0700147 print " ADDR " + self.spacing + "VALUE " + "FUNCTION".ljust(maxlen) + " FILE:LINE"
Brigid Smithea0a8352014-06-30 16:01:40 -0700148 for vl in self.value_lines:
149 (addr, value, symbol_with_offset, location) = vl
150 print " %8s %8s %s %s" % (addr, value, symbol_with_offset.ljust(maxlen), location)
151 return
152
153 def PrintOutput(self, trace_lines, value_lines):
154 if self.trace_lines:
155 self.PrintTraceLines(self.trace_lines)
156 if self.value_lines:
157 self.PrintValueLines(self.value_lines)
158
159 def PrintDivider(self):
160 print
161 print "-----------------------------------------------------\n"
162
163 def ConvertTrace(self, lines):
164 lines = map(self.CleanLine, lines)
165 for line in lines:
166 self.ProcessLine(line)
167 self.PrintOutput(self.trace_lines, self.value_lines)
168
Andreas Gamped900d082015-08-21 15:25:03 -0700169 def MatchTraceLine(self, line):
170 if self.trace_line.match(line):
171 match = self.trace_line.match(line)
172 return {"frame": match.group("frame"),
173 "offset": match.group("offset"),
174 "dso": match.group("dso"),
175 "symbol_present": bool(match.group("symbolpresent")),
176 "symbol_name": match.group("symbol")}
177 if self.sanitizer_trace_line.match(line):
178 match = self.sanitizer_trace_line.match(line)
179 return {"frame": match.group("frame"),
180 "offset": match.group("offset"),
181 "dso": match.group("dso"),
182 "symbol_present": False,
183 "symbol_name": None}
184 return None
185
Brigid Smithea0a8352014-06-30 16:01:40 -0700186 def ProcessLine(self, line):
Brigid Smith9c2192a2014-07-07 10:33:21 -0700187 ret = False
Brigid Smithea0a8352014-06-30 16:01:40 -0700188 process_header = self.process_info_line.search(line)
189 signal_header = self.signal_line.search(line)
190 abort_message_header = self.abort_message_line.search(line)
191 thread_header = self.thread_line.search(line)
192 register_header = self.register_line.search(line)
193 abi_header = self.abi_line.search(line)
Brigid Smith0b309402014-07-07 14:34:00 -0700194 revision_header = self.revision_line.search(line)
Brigid Smithea0a8352014-06-30 16:01:40 -0700195 dalvik_jni_thread_header = self.dalvik_jni_thread_line.search(line)
196 dalvik_native_thread_header = self.dalvik_native_thread_line.search(line)
Elliott Hughesd2471c82014-06-17 16:55:10 -0700197 if process_header or signal_header or abort_message_header or thread_header or abi_header or \
Brigid Smith0b309402014-07-07 14:34:00 -0700198 register_header or dalvik_jni_thread_header or dalvik_native_thread_header or revision_header:
Brigid Smith9c2192a2014-07-07 10:33:21 -0700199 ret = True
Brigid Smithea0a8352014-06-30 16:01:40 -0700200 if self.trace_lines or self.value_lines:
201 self.PrintOutput(self.trace_lines, self.value_lines)
202 self.PrintDivider()
203 self.trace_lines = []
204 self.value_lines = []
205 self.last_frame = -1
Ben Chengb42dad02013-04-25 15:14:04 -0700206 if process_header:
207 print process_header.group(1)
208 if signal_header:
209 print signal_header.group(1)
Elliott Hughesd2471c82014-06-17 16:55:10 -0700210 if abort_message_header:
211 print abort_message_header.group(1)
Ben Chengb42dad02013-04-25 15:14:04 -0700212 if register_header:
213 print register_header.group(1)
214 if thread_header:
215 print thread_header.group(1)
216 if dalvik_jni_thread_header:
217 print dalvik_jni_thread_header.group(1)
218 if dalvik_native_thread_header:
219 print dalvik_native_thread_header.group(1)
Brigid Smith0b309402014-07-07 14:34:00 -0700220 if revision_header:
221 print revision_header.group(1)
Brigid Smith45a46c62014-06-10 17:31:32 -0700222 if abi_header:
223 print abi_header.group(1)
Brigid Smithea0a8352014-06-30 16:01:40 -0700224 symbol.ARCH = abi_header.group(2)
Elliott Hughesa9e34172014-07-01 14:56:22 -0700225 self.UpdateAbiRegexes()
Brigid Smith9c2192a2014-07-07 10:33:21 -0700226 return ret
Andreas Gamped900d082015-08-21 15:25:03 -0700227 trace_line_dict = self.MatchTraceLine(line)
228 if trace_line_dict is not None:
Brigid Smith9c2192a2014-07-07 10:33:21 -0700229 ret = True
Andreas Gamped900d082015-08-21 15:25:03 -0700230 frame = trace_line_dict["frame"]
231 code_addr = trace_line_dict["offset"]
232 area = trace_line_dict["dso"]
233 symbol_present = trace_line_dict["symbol_present"]
234 symbol_name = trace_line_dict["symbol_name"]
Ben Chengb42dad02013-04-25 15:14:04 -0700235
Brigid Smithea0a8352014-06-30 16:01:40 -0700236 if frame <= self.last_frame and (self.trace_lines or self.value_lines):
237 self.PrintOutput(self.trace_lines, self.value_lines)
238 self.PrintDivider()
239 self.trace_lines = []
240 self.value_lines = []
241 self.last_frame = frame
Ben Chengb42dad02013-04-25 15:14:04 -0700242
Brigid Smithea0a8352014-06-30 16:01:40 -0700243 if area == "<unknown>" or area == "[heap]" or area == "[stack]":
244 self.trace_lines.append((code_addr, "", area))
Ben Chengb42dad02013-04-25 15:14:04 -0700245 else:
246 # If a calls b which further calls c and c is inlined to b, we want to
247 # display "a -> b -> c" in the stack trace instead of just "a -> c"
248 info = symbol.SymbolInformation(area, code_addr)
249 nest_count = len(info) - 1
250 for (source_symbol, source_location, object_symbol_with_offset) in info:
251 if not source_symbol:
252 if symbol_present:
253 source_symbol = symbol.CallCppFilt(symbol_name)
254 else:
Brigid Smithea0a8352014-06-30 16:01:40 -0700255 source_symbol = "<unknown>"
Ben Chengb42dad02013-04-25 15:14:04 -0700256 if not source_location:
257 source_location = area
258 if nest_count > 0:
259 nest_count = nest_count - 1
Brigid Smithea0a8352014-06-30 16:01:40 -0700260 arrow = "v------>"
261 if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64":
262 arrow = "v-------------->"
263 self.trace_lines.append((arrow, source_symbol, source_location))
Ben Chengb42dad02013-04-25 15:14:04 -0700264 else:
265 if not object_symbol_with_offset:
266 object_symbol_with_offset = source_symbol
Brigid Smithea0a8352014-06-30 16:01:40 -0700267 self.trace_lines.append((code_addr,
Ben Chengb42dad02013-04-25 15:14:04 -0700268 object_symbol_with_offset,
269 source_location))
Brigid Smithea0a8352014-06-30 16:01:40 -0700270 if self.code_line.match(line):
Ben Chengb42dad02013-04-25 15:14:04 -0700271 # Code lines should be ignored. If this were exluded the 'code around'
272 # sections would trigger value_line matches.
Brigid Smith9c2192a2014-07-07 10:33:21 -0700273 return ret
Brigid Smithea0a8352014-06-30 16:01:40 -0700274 if self.value_line.match(line):
Brigid Smith9c2192a2014-07-07 10:33:21 -0700275 ret = True
Brigid Smithea0a8352014-06-30 16:01:40 -0700276 match = self.value_line.match(line)
Ben Chengb42dad02013-04-25 15:14:04 -0700277 (unused_, addr, value, area, symbol_present, symbol_name) = match.groups()
Brigid Smithea0a8352014-06-30 16:01:40 -0700278 if area == "<unknown>" or area == "[heap]" or area == "[stack]" or not area:
279 self.value_lines.append((addr, value, "", area))
Ben Chengb42dad02013-04-25 15:14:04 -0700280 else:
281 info = symbol.SymbolInformation(area, value)
282 (source_symbol, source_location, object_symbol_with_offset) = info.pop()
283 if not source_symbol:
284 if symbol_present:
285 source_symbol = symbol.CallCppFilt(symbol_name)
286 else:
Brigid Smithea0a8352014-06-30 16:01:40 -0700287 source_symbol = "<unknown>"
Ben Chengb42dad02013-04-25 15:14:04 -0700288 if not source_location:
289 source_location = area
290 if not object_symbol_with_offset:
291 object_symbol_with_offset = source_symbol
Brigid Smithea0a8352014-06-30 16:01:40 -0700292 self.value_lines.append((addr,
Ben Chengb42dad02013-04-25 15:14:04 -0700293 value,
294 object_symbol_with_offset,
295 source_location))
296
Brigid Smith9c2192a2014-07-07 10:33:21 -0700297 return ret
Elliott Hughesa9e34172014-07-01 14:56:22 -0700298
299
Elliott Hughesa9e34172014-07-01 14:56:22 -0700300class RegisterPatternTests(unittest.TestCase):
301 def assert_register_matches(self, abi, example_crash, stupid_pattern):
302 tc = TraceConverter()
Elliott Hughesa9e34172014-07-01 14:56:22 -0700303 for line in example_crash.split('\n'):
Elliott Hughesc3c86192014-08-29 13:49:57 -0700304 tc.ProcessLine(line)
Elliott Hughesa9e34172014-07-01 14:56:22 -0700305 is_register = (re.search(stupid_pattern, line) is not None)
306 matched = (tc.register_line.search(line) is not None)
307 self.assertEquals(matched, is_register, line)
Elliott Hughesc3c86192014-08-29 13:49:57 -0700308 tc.PrintOutput(tc.trace_lines, tc.value_lines)
Elliott Hughesa9e34172014-07-01 14:56:22 -0700309
310 def test_arm_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700311 self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700312
313 def test_arm64_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700314 self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700315
316 def test_mips_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700317 self.assert_register_matches("mips", example_crashes.mips, '\\b(zr|a0|t0|t4|s0|s4|t8|gp|hi)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700318
Andreas Gampe820ca722015-06-01 15:43:52 -0700319 def test_mips64_registers(self):
320 self.assert_register_matches("mips64", example_crashes.mips64, '\\b(zr|a0|a4|t0|s0|s4|t8|gp|hi)\\b')
321
Elliott Hughesa9e34172014-07-01 14:56:22 -0700322 def test_x86_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700323 self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700324
325 def test_x86_64_registers(self):
Elliott Hughesc3166be2014-07-07 15:06:28 -0700326 self.assert_register_matches("x86_64", example_crashes.x86_64, '\\b(rax|rsi|r8|r12|cs|rip)\\b')
Elliott Hughesa9e34172014-07-01 14:56:22 -0700327
328
329if __name__ == '__main__':
330 unittest.main()