blob: 8a1bceb0bbc5e33b1c966298e4e4a1eb5f5ecf20 [file] [log] [blame]
Thieu Lec16253b2011-03-03 11:13:54 -08001# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import os
Frank Henigman835cf1a2014-09-22 11:36:25 -04007import re
Jacob Dufault2ee76b42016-03-23 15:57:27 -07008import shutil
Thieu Lec16253b2011-03-03 11:13:54 -08009from autotest_lib.client.common_lib import utils as client_utils
Chris Masonebafbbb02012-05-16 13:41:36 -070010from autotest_lib.client.common_lib.cros import dev_server
Dan Shi5af4dd92015-09-16 16:08:30 -070011from autotest_lib.client.common_lib.cros import retry
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000012from autotest_lib.client.cros import constants
Chris Masone44e4d6c2012-08-15 14:25:53 -070013from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
Allen Li5889a8a2016-08-17 11:54:09 -070014from autotest_lib.server.crashcollect import collect_log_file
Thieu Lec16253b2011-03-03 11:13:54 -080015from autotest_lib.server import utils
16
Dan Shi5e2efb72017-02-07 11:40:23 -080017try:
18 from chromite.lib import metrics
19except ImportError:
20 metrics = client_utils.metrics_mock
Chris Sosaaccb5ce2012-08-30 17:29:15 -070021
Dan Shi701ec822015-08-24 12:03:25 -070022
Thieu Lec16253b2011-03-03 11:13:54 -080023def generate_minidump_stacktrace(minidump_path):
24 """
25 Generates a stacktrace for the specified minidump.
26
27 This function expects the debug symbols to reside under:
28 /build/<board>/usr/lib/debug
Chris Masonebafbbb02012-05-16 13:41:36 -070029
30 @param minidump_path: absolute path to minidump to by symbolicated.
31 @raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
Thieu Lec16253b2011-03-03 11:13:54 -080032 """
33 symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
Alex Miller24c27c12012-08-09 10:24:24 -070034 logging.info('symbol_dir: %s', symbol_dir)
Michael Krebs30058702012-09-25 15:37:04 -070035 client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
Chris Masonebafbbb02012-05-16 13:41:36 -070036 (minidump_path, symbol_dir, minidump_path))
37
38
Aviv Keshet11836322016-11-22 11:32:01 -080039def _resolve_crashserver():
40 """
41 Attempts to find a devserver / crashserver that has capacity to
42 symbolicate a crashdump.
43
44 @raises DevServerException if no server with capacity could be found.
45 @returns Hostname of resolved server, if found.
46 """
47 crashserver_name = dev_server.get_least_loaded_devserver(
48 devserver_type=dev_server.CrashServer)
49 if not crashserver_name:
50 metrics.Counter('chromeos/autotest/crashcollect/could_not_resolve'
51 ).increment()
52 raise dev_server.DevServerException(
53 'No crash server has the capacity to symbolicate the dump.')
54 else:
Aviv Keshetb30d0072017-02-13 12:27:45 -080055 metrics.Counter('chromeos/autotest/crashcollect/resolved'
56 ).increment(fields={'crash_server': crashserver_name})
Aviv Keshet11836322016-11-22 11:32:01 -080057 return crashserver_name
58
59
60def _symbolicate_minidump_with_devserver(minidump_path, resultdir,
61 crashserver_name):
Chris Masonebafbbb02012-05-16 13:41:36 -070062 """
63 Generates a stack trace for the specified minidump by consulting devserver.
64
65 This function assumes the debug symbols have been staged on the devserver.
66
67 @param minidump_path: absolute path to minidump to by symbolicated.
68 @param resultdir: server job's result directory.
Aviv Keshet11836322016-11-22 11:32:01 -080069 @param crashserver_name: Name of crashserver to attempt to symbolicate with.
Chris Masonebafbbb02012-05-16 13:41:36 -070070 @raise DevServerException upon failure, HTTP or otherwise.
71 """
72 # First, look up what build we tested. If we can't find this, we can't
73 # get the right debug symbols, so we might as well give up right now.
74 keyvals = client_utils.read_keyval(resultdir)
Chris Masone44e4d6c2012-08-15 14:25:53 -070075 if JOB_BUILD_KEY not in keyvals:
Chris Masonebafbbb02012-05-16 13:41:36 -070076 raise dev_server.DevServerException(
77 'Cannot determine build being tested.')
78
Dan Shi701ec822015-08-24 12:03:25 -070079 devserver = dev_server.CrashServer(crashserver_name)
Aviv Keshet11836322016-11-22 11:32:01 -080080
81 with metrics.SecondsTimer(
82 'chromeos/autotest/crashcollect/symbolicate_duration',
83 fields={'crash_server': crashserver_name}):
84 trace_text = devserver.symbolicate_dump(minidump_path,
85 keyvals[JOB_BUILD_KEY])
86
Chris Masonebafbbb02012-05-16 13:41:36 -070087 if not trace_text:
88 raise dev_server.DevServerException('Unknown error!!')
89 with open(minidump_path + '.txt', 'w') as trace_file:
90 trace_file.write(trace_text)
Thieu Lec16253b2011-03-03 11:13:54 -080091
Jacob Dufault2ee76b42016-03-23 15:57:27 -070092def generate_stacktrace_for_file(minidump, host_resultdir):
93 """
94 Tries to generate a stack trace for the file located at |minidump|.
95 @param minidump: path to minidump file to generate the stacktrace for.
96 @param host_resultdir: server job's result directory.
97 """
98 # First, try to symbolicate locally.
99 try:
100 logging.info('Trying to generate stack trace locally for %s', minidump)
101 generate_minidump_stacktrace(minidump)
102 logging.info('Generated stack trace for dump %s', minidump)
103 return
104 except client_utils.error.CmdError as err:
105 logging.info('Failed to generate stack trace locally for '
106 'dump %s (rc=%d):\n%r',
107 minidump, err.result_obj.exit_status, err)
108
109 # If that did not succeed, try to symbolicate using the dev server.
110 try:
111 logging.info('Generating stack trace using devserver for %s', minidump)
Aviv Keshet11836322016-11-22 11:32:01 -0800112 crashserver_name = _resolve_crashserver()
113 args = (minidump, host_resultdir, crashserver_name)
114 is_timeout, _ = retry.timeout(_symbolicate_minidump_with_devserver,
115 args=args,
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700116 timeout_sec=600)
117 if is_timeout:
118 logging.info('Generating stack trace timed out for dump %s',
119 minidump)
Aviv Keshet11836322016-11-22 11:32:01 -0800120 metrics.Counter(
121 'chromeos/autotest/crashcollect/symbolicate_timed_out'
122 ).increment(fields={'crash_server': crashserver_name})
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700123 else:
124 logging.info('Generated stack trace for dump %s', minidump)
125 return
126 except dev_server.DevServerException as e:
127 logging.info('Failed to generate stack trace on devserver for dump '
128 '%s:\n%r', minidump, e)
129
130 # Symbolicating failed.
131 logging.warning('Failed to generate stack trace for %s (see info logs)',
132 minidump)
Thieu Lec16253b2011-03-03 11:13:54 -0800133
Chris Masoned931e8c2011-11-09 13:17:16 -0800134def find_and_generate_minidump_stacktraces(host_resultdir):
Thieu Lec16253b2011-03-03 11:13:54 -0800135 """
136 Finds all minidump files and generates a stack trace for each.
137
138 Enumerates all files under the test results directory (recursively)
139 and generates a stack trace file for the minidumps. Minidump files are
140 identified as files with .dmp extension. The stack trace filename is
141 composed by appending the .txt extension to the minidump filename.
Alex Miller24c27c12012-08-09 10:24:24 -0700142
beeps71bf47c2013-11-14 20:44:30 -0800143 @param host_resultdir: Directory to walk looking for dmp files.
144
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700145 @returns The list of all found minidump files. Each dump may or may not have
146 been symbolized.
Thieu Lec16253b2011-03-03 11:13:54 -0800147 """
Alex Miller24c27c12012-08-09 10:24:24 -0700148 minidumps = []
Allen Liab020912016-09-19 18:07:41 -0700149 for file in _find_crashdumps(host_resultdir):
150 generate_stacktrace_for_file(file, host_resultdir)
151 minidumps.append(file)
Alex Miller24c27c12012-08-09 10:24:24 -0700152 return minidumps
Thieu Lec16253b2011-03-03 11:13:54 -0800153
154
Allen Liab020912016-09-19 18:07:41 -0700155def _find_crashdumps(host_resultdir):
156 """Find crashdumps.
157
158 @param host_resultdir The result directory for this host for this test run.
159 """
160 for dir, subdirs, files in os.walk(host_resultdir):
161 for file in files:
162 if file.endswith('.dmp'):
163 yield os.path.join(dir, file)
164
165
166def _find_orphaned_crashdumps(host):
167 """Return file paths of crashdumps on host.
168
169 @param host A host object of the device.
170 """
171 return host.list_files_glob(os.path.join(constants.CRASH_DIR, '*'))
172
173
174def report_crashdumps(host):
175 """Report on crashdumps for host.
176
177 This is run when no tests failed. We don't process crashdumps in this
178 case because of devserver load, but they should still be reported.
179
180 @param host A host object of the device we're to pull crashes from.
181 """
182 for crashfile in _find_orphaned_crashdumps(host):
183 logging.warning('Host crashdump exists: %s', crashfile)
184 host.job.record('INFO', None, None,
185 'Host crashdump exists: %s' % (crashfile,))
186
187 host_resultdir = _get_host_resultdir(host)
188 for crashfile in _find_crashdumps(host_resultdir):
189 logging.warning('Local crashdump exists: %s', crashfile)
190 host.job.record('INFO', None, None,
191 'Local crashdump exists: %s' % (crashfile,))
192
193
194def fetch_orphaned_crashdumps(host, infodir):
Alex Miller24c27c12012-08-09 10:24:24 -0700195 """
196 Copy all of the crashes in the crash directory over to the results folder.
197
198 @param host A host object of the device we're to pull crashes from.
Allen Liab020912016-09-19 18:07:41 -0700199 @param infodir The directory to fetch crashdumps into.
Alex Miller24c27c12012-08-09 10:24:24 -0700200 @return The list of minidumps that we pulled back from the host.
201 """
Allen Liab020912016-09-19 18:07:41 -0700202 if not os.path.exists(infodir):
203 os.mkdir(infodir)
204 orphans = []
205 try:
206 for file in _find_orphaned_crashdumps(host):
207 logging.info('Collecting %s...', file)
208 collect_log_file(host, file, infodir, clean=True)
209 orphans.append(file)
210 except Exception as e:
211 logging.warning('Collection of orphaned crash dumps failed %s', e)
212 finally:
213 # Delete infodir if we have no orphans
214 if not orphans:
215 logging.info('There are no orphaned crashes; deleting %s', infodir)
216 os.rmdir(infodir)
217 return orphans
Chris Masoned931e8c2011-11-09 13:17:16 -0800218
219
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700220def _copy_to_debug_dir(host_resultdir, filename):
221 """
222 Copies a file to the debug dir under host_resultdir.
223
224 @param host_resultdir The result directory for this host for this test run.
225 @param filename The full path of the file to copy to the debug folder.
226 """
227 debugdir = os.path.join(host_resultdir, 'debug')
228 src = filename
229 dst = os.path.join(debugdir, os.path.basename(filename))
230
231 try:
232 shutil.copyfile(src, dst)
233 logging.info('Copied %s to %s', src, dst)
234 except IOError:
235 logging.warning('Failed to copy %s to %s', src, dst)
236
237
Allen Liab020912016-09-19 18:07:41 -0700238def _get_host_resultdir(host):
239 """Get resultdir for host.
240
241 @param host A host object of the device we're to pull crashes from.
242 """
243 return getattr(getattr(host, 'job', None), 'resultdir', None)
244
245
246def get_host_infodir(host):
247 """Get infodir for host.
248
249 @param host A host object of the device we're to pull crashes from.
250 """
251 host_resultdir = _get_host_resultdir(host)
252 return os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
253
254
Thieu Lec16253b2011-03-03 11:13:54 -0800255def get_site_crashdumps(host, test_start_time):
Alex Miller24c27c12012-08-09 10:24:24 -0700256 """
257 Copy all of the crashdumps from a host to the results directory.
258
259 @param host The host object from which to pull crashes
260 @param test_start_time When the test we just ran started.
261 @return A list of all the minidumps
262 """
Allen Liab020912016-09-19 18:07:41 -0700263 host_resultdir = _get_host_resultdir(host)
264 infodir = get_host_infodir(host)
Alex Miller24c27c12012-08-09 10:24:24 -0700265
Allen Liab020912016-09-19 18:07:41 -0700266 orphans = fetch_orphaned_crashdumps(host, infodir)
Alex Miller24c27c12012-08-09 10:24:24 -0700267 minidumps = find_and_generate_minidump_stacktraces(host_resultdir)
beeps71bf47c2013-11-14 20:44:30 -0800268
269 # Record all crashdumps in status.log of the job:
270 # - If one server job runs several client jobs we will only record
271 # crashdumps in the status.log of the high level server job.
272 # - We will record these crashdumps whether or not we successfully
273 # symbolicate them.
274 if host.job and minidumps or orphans:
275 host.job.record('INFO', None, None, 'Start crashcollection record')
276 for minidump in minidumps:
277 host.job.record('INFO', None, 'New Crash Dump', minidump)
278 for orphan in orphans:
279 host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
280 host.job.record('INFO', None, None, 'End crashcollection record')
281
Alex Miller24c27c12012-08-09 10:24:24 -0700282 orphans.extend(minidumps)
283
284 for minidump in orphans:
285 report_bug_from_crash(host, minidump)
286
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700287 # We copy Chrome crash information to the debug dir to assist debugging.
288 # Since orphans occurred on a previous run, they are most likely not
289 # relevant to the current failure, so we don't copy them.
290 for minidump in minidumps:
291 minidump_no_ext = os.path.splitext(minidump)[0]
292 _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.dmp.txt')
293 _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.log')
294
Alex Miller24c27c12012-08-09 10:24:24 -0700295 return orphans
296
297
Frank Henigman835cf1a2014-09-22 11:36:25 -0400298def find_package_of(host, exec_name):
Alex Miller24c27c12012-08-09 10:24:24 -0700299 """
300 Find the package that an executable came from.
301
302 @param host A host object that has the executable.
Frank Henigman835cf1a2014-09-22 11:36:25 -0400303 @param exec_name Name of or path to executable.
Alex Miller24c27c12012-08-09 10:24:24 -0700304 @return The name of the package that installed the executable.
305 """
Frank Henigman835cf1a2014-09-22 11:36:25 -0400306 # Run "portageq owners" on "host" to determine which package owns
307 # "exec_name." Portageq queue output consists of package names followed
308 # tab-prefixed path names. For example, owners of "python:"
309 #
310 # sys-devel/gdb-7.7.1-r2
311 # /usr/share/gdb/python
312 # chromeos-base/dev-install-0.0.1-r711
313 # /usr/bin/python
314 # dev-lang/python-2.7.3-r7
315 # /etc/env.d/python
316 #
317 # This gets piped into "xargs stat" to annotate each line with
318 # information about the path, so we later can consider only packages
319 # with executable files. After annotation the above looks like:
320 #
321 # stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
322 # stat: cannot stat '/usr/share/gdb/python': ...
323 # stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
324 # 755 -rwxr-xr-x /usr/bin/python
325 # stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
326 # 755 drwxr-xr-x /etc/env.d/python
327 #
328 # Package names are surrounded by "@@@" to facilitate parsing. Lines
329 # starting with an octal number were successfully annotated, because
330 # the path existed on "host."
331 # The above is then parsed to find packages which contain executable files
332 # (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
333 #
Frank Henigman835cf1a2014-09-22 11:36:25 -0400334 # TODO(milleral): portageq can show scary looking error messages
335 # in the debug logs via stderr. We only look at stdout, so those
336 # get filtered, but it would be good to silence them.
337 cmd = ('portageq owners / ' + exec_name +
338 r'| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
339 r'| tr \\n \\0'
Frank Henigman7dcd0dd2015-01-15 21:12:24 -0500340 ' | xargs -0 -r stat -L -c "%a %A %n" 2>&1')
341 portageq = host.run(cmd, ignore_status=True)
Alex Miller24c27c12012-08-09 10:24:24 -0700342
Frank Henigman835cf1a2014-09-22 11:36:25 -0400343 # Parse into a set of names of packages containing an executable file.
344 packages = set()
345 pkg = ''
346 pkg_re = re.compile('@@@ (.*) @@@')
347 path_re = re.compile('^([0-7]{3,}) (.)')
348 for line in portageq.stdout.splitlines():
349 match = pkg_re.search(line)
350 if match:
351 pkg = match.group(1)
352 continue
353 match = path_re.match(line)
354 if match:
355 isexec = int(match.group(1), 8) & 0o111
356 isfile = match.group(2) == '-'
357 if pkg and isexec and isfile:
358 packages.add(pkg)
Alex Miller24c27c12012-08-09 10:24:24 -0700359
Frank Henigman835cf1a2014-09-22 11:36:25 -0400360 # If exactly one package found it must be the one we want, return it.
361 if len(packages) == 1:
362 return packages.pop()
363
364 # TODO(milleral): Decide if it really is an error if not exactly one
365 # package is found.
Alex Miller24c27c12012-08-09 10:24:24 -0700366 # It is highly questionable as to if this should be left in the
367 # production version of this code or not.
368 if len(packages) == 0:
Frank Henigman835cf1a2014-09-22 11:36:25 -0400369 logging.warning('find_package_of() found no packages for "%s"',
370 exec_name)
371 else:
372 logging.warning('find_package_of() found multiple packages for "%s": '
373 '%s', exec_name, ', '.join(packages))
374 return ''
Alex Miller24c27c12012-08-09 10:24:24 -0700375
376
377def report_bug_from_crash(host, minidump_path):
378 """
379 Given a host to query and a minidump, file a bug about the crash.
380
381 @param host A host object that is where the dump came from
382 @param minidump_path The path to the dump file that should be reported.
383 """
384 # TODO(milleral): Once this has actually been tested, remove the
385 # try/except. In the meantime, let's make sure nothing dies because of
386 # the fact that this code isn't very heavily tested.
387 try:
388 meta_path = os.path.splitext(minidump_path)[0] + '.meta'
389 with open(meta_path, 'r') as f:
390 for line in f.readlines():
391 parts = line.split('=')
392 if parts[0] == 'exec_name':
Frank Henigman835cf1a2014-09-22 11:36:25 -0400393 package = find_package_of(host, parts[1].strip())
394 if not package:
395 package = '<unknown package>'
396 logging.info('Would report crash on %s.', package)
Frank Henigman8c91d112015-01-15 16:17:39 -0500397 break
Alex Miller24c27c12012-08-09 10:24:24 -0700398 except Exception as e:
beepscb6f1e22013-06-28 19:14:10 -0700399 logging.warning('Crash detection failed with: %s', e)