blob: b4ac6a0e8fd38e168f577c4d1e58d8922241eaaa [file] [log] [blame]
Thieu Lec16253b2011-03-03 11:13:54 -08001# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import logging
6import os
Frank Henigman835cf1a2014-09-22 11:36:25 -04007import re
Jacob Dufault2ee76b42016-03-23 15:57:27 -07008import shutil
Thieu Lec16253b2011-03-03 11:13:54 -08009from autotest_lib.client.common_lib import utils as client_utils
Chris Masonebafbbb02012-05-16 13:41:36 -070010from autotest_lib.client.common_lib.cros import dev_server
Dan Shi5af4dd92015-09-16 16:08:30 -070011from autotest_lib.client.common_lib.cros import retry
Hsinyu Chaoe0b08e62015-08-11 10:50:37 +000012from autotest_lib.client.cros import constants
Chris Masone44e4d6c2012-08-15 14:25:53 -070013from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
Allen Li5889a8a2016-08-17 11:54:09 -070014from autotest_lib.server.crashcollect import collect_log_file
Thieu Lec16253b2011-03-03 11:13:54 -080015from autotest_lib.server import utils
16
Dan Shi5e2efb72017-02-07 11:40:23 -080017try:
18 from chromite.lib import metrics
19except ImportError:
20 metrics = client_utils.metrics_mock
Chris Sosaaccb5ce2012-08-30 17:29:15 -070021
Dan Shi701ec822015-08-24 12:03:25 -070022
Thieu Lec16253b2011-03-03 11:13:54 -080023def generate_minidump_stacktrace(minidump_path):
24 """
25 Generates a stacktrace for the specified minidump.
26
27 This function expects the debug symbols to reside under:
28 /build/<board>/usr/lib/debug
Chris Masonebafbbb02012-05-16 13:41:36 -070029
30 @param minidump_path: absolute path to minidump to by symbolicated.
31 @raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
Thieu Lec16253b2011-03-03 11:13:54 -080032 """
33 symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
Alex Miller24c27c12012-08-09 10:24:24 -070034 logging.info('symbol_dir: %s', symbol_dir)
Michael Krebs30058702012-09-25 15:37:04 -070035 client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
Chris Masonebafbbb02012-05-16 13:41:36 -070036 (minidump_path, symbol_dir, minidump_path))
37
38
Aviv Keshet11836322016-11-22 11:32:01 -080039def _resolve_crashserver():
40 """
41 Attempts to find a devserver / crashserver that has capacity to
42 symbolicate a crashdump.
43
44 @raises DevServerException if no server with capacity could be found.
45 @returns Hostname of resolved server, if found.
46 """
47 crashserver_name = dev_server.get_least_loaded_devserver(
48 devserver_type=dev_server.CrashServer)
49 if not crashserver_name:
50 metrics.Counter('chromeos/autotest/crashcollect/could_not_resolve'
51 ).increment()
52 raise dev_server.DevServerException(
53 'No crash server has the capacity to symbolicate the dump.')
54 else:
Aviv Keshetb30d0072017-02-13 12:27:45 -080055 metrics.Counter('chromeos/autotest/crashcollect/resolved'
56 ).increment(fields={'crash_server': crashserver_name})
Aviv Keshet11836322016-11-22 11:32:01 -080057 return crashserver_name
58
59
60def _symbolicate_minidump_with_devserver(minidump_path, resultdir,
61 crashserver_name):
Chris Masonebafbbb02012-05-16 13:41:36 -070062 """
63 Generates a stack trace for the specified minidump by consulting devserver.
64
65 This function assumes the debug symbols have been staged on the devserver.
66
67 @param minidump_path: absolute path to minidump to by symbolicated.
68 @param resultdir: server job's result directory.
Aviv Keshet11836322016-11-22 11:32:01 -080069 @param crashserver_name: Name of crashserver to attempt to symbolicate with.
Chris Masonebafbbb02012-05-16 13:41:36 -070070 @raise DevServerException upon failure, HTTP or otherwise.
71 """
72 # First, look up what build we tested. If we can't find this, we can't
73 # get the right debug symbols, so we might as well give up right now.
74 keyvals = client_utils.read_keyval(resultdir)
Chris Masone44e4d6c2012-08-15 14:25:53 -070075 if JOB_BUILD_KEY not in keyvals:
Chris Masonebafbbb02012-05-16 13:41:36 -070076 raise dev_server.DevServerException(
77 'Cannot determine build being tested.')
78
Dan Shi701ec822015-08-24 12:03:25 -070079 devserver = dev_server.CrashServer(crashserver_name)
Aviv Keshet11836322016-11-22 11:32:01 -080080
81 with metrics.SecondsTimer(
82 'chromeos/autotest/crashcollect/symbolicate_duration',
83 fields={'crash_server': crashserver_name}):
84 trace_text = devserver.symbolicate_dump(minidump_path,
85 keyvals[JOB_BUILD_KEY])
86
Chris Masonebafbbb02012-05-16 13:41:36 -070087 if not trace_text:
88 raise dev_server.DevServerException('Unknown error!!')
89 with open(minidump_path + '.txt', 'w') as trace_file:
90 trace_file.write(trace_text)
Thieu Lec16253b2011-03-03 11:13:54 -080091
Jacob Dufault2ee76b42016-03-23 15:57:27 -070092def generate_stacktrace_for_file(minidump, host_resultdir):
93 """
94 Tries to generate a stack trace for the file located at |minidump|.
95 @param minidump: path to minidump file to generate the stacktrace for.
96 @param host_resultdir: server job's result directory.
97 """
98 # First, try to symbolicate locally.
99 try:
100 logging.info('Trying to generate stack trace locally for %s', minidump)
101 generate_minidump_stacktrace(minidump)
102 logging.info('Generated stack trace for dump %s', minidump)
103 return
104 except client_utils.error.CmdError as err:
105 logging.info('Failed to generate stack trace locally for '
106 'dump %s (rc=%d):\n%r',
107 minidump, err.result_obj.exit_status, err)
108
109 # If that did not succeed, try to symbolicate using the dev server.
110 try:
111 logging.info('Generating stack trace using devserver for %s', minidump)
Aviv Keshet11836322016-11-22 11:32:01 -0800112 crashserver_name = _resolve_crashserver()
113 args = (minidump, host_resultdir, crashserver_name)
114 is_timeout, _ = retry.timeout(_symbolicate_minidump_with_devserver,
115 args=args,
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700116 timeout_sec=600)
117 if is_timeout:
118 logging.info('Generating stack trace timed out for dump %s',
119 minidump)
Aviv Keshet11836322016-11-22 11:32:01 -0800120 metrics.Counter(
121 'chromeos/autotest/crashcollect/symbolicate_timed_out'
122 ).increment(fields={'crash_server': crashserver_name})
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700123 else:
124 logging.info('Generated stack trace for dump %s', minidump)
125 return
126 except dev_server.DevServerException as e:
127 logging.info('Failed to generate stack trace on devserver for dump '
128 '%s:\n%r', minidump, e)
129
130 # Symbolicating failed.
131 logging.warning('Failed to generate stack trace for %s (see info logs)',
132 minidump)
Thieu Lec16253b2011-03-03 11:13:54 -0800133
Chris Masoned931e8c2011-11-09 13:17:16 -0800134def find_and_generate_minidump_stacktraces(host_resultdir):
Thieu Lec16253b2011-03-03 11:13:54 -0800135 """
136 Finds all minidump files and generates a stack trace for each.
137
138 Enumerates all files under the test results directory (recursively)
139 and generates a stack trace file for the minidumps. Minidump files are
140 identified as files with .dmp extension. The stack trace filename is
141 composed by appending the .txt extension to the minidump filename.
Alex Miller24c27c12012-08-09 10:24:24 -0700142
beeps71bf47c2013-11-14 20:44:30 -0800143 @param host_resultdir: Directory to walk looking for dmp files.
144
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700145 @returns The list of all found minidump files. Each dump may or may not have
146 been symbolized.
Thieu Lec16253b2011-03-03 11:13:54 -0800147 """
Alex Miller24c27c12012-08-09 10:24:24 -0700148 minidumps = []
Allen Liab020912016-09-19 18:07:41 -0700149 for file in _find_crashdumps(host_resultdir):
150 generate_stacktrace_for_file(file, host_resultdir)
151 minidumps.append(file)
Alex Miller24c27c12012-08-09 10:24:24 -0700152 return minidumps
Thieu Lec16253b2011-03-03 11:13:54 -0800153
154
Allen Liab020912016-09-19 18:07:41 -0700155def _find_crashdumps(host_resultdir):
156 """Find crashdumps.
157
158 @param host_resultdir The result directory for this host for this test run.
159 """
160 for dir, subdirs, files in os.walk(host_resultdir):
161 for file in files:
162 if file.endswith('.dmp'):
163 yield os.path.join(dir, file)
164
165
166def _find_orphaned_crashdumps(host):
167 """Return file paths of crashdumps on host.
168
169 @param host A host object of the device.
170 """
171 return host.list_files_glob(os.path.join(constants.CRASH_DIR, '*'))
172
173
174def report_crashdumps(host):
175 """Report on crashdumps for host.
176
177 This is run when no tests failed. We don't process crashdumps in this
178 case because of devserver load, but they should still be reported.
179
180 @param host A host object of the device we're to pull crashes from.
181 """
182 for crashfile in _find_orphaned_crashdumps(host):
183 logging.warning('Host crashdump exists: %s', crashfile)
184 host.job.record('INFO', None, None,
185 'Host crashdump exists: %s' % (crashfile,))
186
187 host_resultdir = _get_host_resultdir(host)
188 for crashfile in _find_crashdumps(host_resultdir):
189 logging.warning('Local crashdump exists: %s', crashfile)
190 host.job.record('INFO', None, None,
191 'Local crashdump exists: %s' % (crashfile,))
192
193
194def fetch_orphaned_crashdumps(host, infodir):
Alex Miller24c27c12012-08-09 10:24:24 -0700195 """
196 Copy all of the crashes in the crash directory over to the results folder.
197
198 @param host A host object of the device we're to pull crashes from.
Allen Liab020912016-09-19 18:07:41 -0700199 @param infodir The directory to fetch crashdumps into.
Alex Miller24c27c12012-08-09 10:24:24 -0700200 @return The list of minidumps that we pulled back from the host.
201 """
Allen Liab020912016-09-19 18:07:41 -0700202 if not os.path.exists(infodir):
203 os.mkdir(infodir)
204 orphans = []
Dan Shi9f92aa62017-07-27 17:07:05 -0700205
206 if not host.check_cached_up_status():
207 logging.warning('Host %s did not answer to ping, skip fetching '
208 'orphaned crashdumps.', host.hostname)
209 return orphans
210
Allen Liab020912016-09-19 18:07:41 -0700211 try:
212 for file in _find_orphaned_crashdumps(host):
213 logging.info('Collecting %s...', file)
214 collect_log_file(host, file, infodir, clean=True)
215 orphans.append(file)
216 except Exception as e:
217 logging.warning('Collection of orphaned crash dumps failed %s', e)
218 finally:
219 # Delete infodir if we have no orphans
220 if not orphans:
221 logging.info('There are no orphaned crashes; deleting %s', infodir)
222 os.rmdir(infodir)
223 return orphans
Chris Masoned931e8c2011-11-09 13:17:16 -0800224
225
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700226def _copy_to_debug_dir(host_resultdir, filename):
227 """
228 Copies a file to the debug dir under host_resultdir.
229
230 @param host_resultdir The result directory for this host for this test run.
231 @param filename The full path of the file to copy to the debug folder.
232 """
233 debugdir = os.path.join(host_resultdir, 'debug')
234 src = filename
235 dst = os.path.join(debugdir, os.path.basename(filename))
236
237 try:
238 shutil.copyfile(src, dst)
239 logging.info('Copied %s to %s', src, dst)
240 except IOError:
241 logging.warning('Failed to copy %s to %s', src, dst)
242
243
Allen Liab020912016-09-19 18:07:41 -0700244def _get_host_resultdir(host):
245 """Get resultdir for host.
246
247 @param host A host object of the device we're to pull crashes from.
248 """
249 return getattr(getattr(host, 'job', None), 'resultdir', None)
250
251
252def get_host_infodir(host):
253 """Get infodir for host.
254
255 @param host A host object of the device we're to pull crashes from.
256 """
257 host_resultdir = _get_host_resultdir(host)
258 return os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
259
260
Thieu Lec16253b2011-03-03 11:13:54 -0800261def get_site_crashdumps(host, test_start_time):
Alex Miller24c27c12012-08-09 10:24:24 -0700262 """
263 Copy all of the crashdumps from a host to the results directory.
264
265 @param host The host object from which to pull crashes
266 @param test_start_time When the test we just ran started.
267 @return A list of all the minidumps
268 """
Allen Liab020912016-09-19 18:07:41 -0700269 host_resultdir = _get_host_resultdir(host)
270 infodir = get_host_infodir(host)
Alex Miller24c27c12012-08-09 10:24:24 -0700271
Allen Liab020912016-09-19 18:07:41 -0700272 orphans = fetch_orphaned_crashdumps(host, infodir)
Alex Miller24c27c12012-08-09 10:24:24 -0700273 minidumps = find_and_generate_minidump_stacktraces(host_resultdir)
beeps71bf47c2013-11-14 20:44:30 -0800274
275 # Record all crashdumps in status.log of the job:
276 # - If one server job runs several client jobs we will only record
277 # crashdumps in the status.log of the high level server job.
278 # - We will record these crashdumps whether or not we successfully
279 # symbolicate them.
280 if host.job and minidumps or orphans:
281 host.job.record('INFO', None, None, 'Start crashcollection record')
282 for minidump in minidumps:
283 host.job.record('INFO', None, 'New Crash Dump', minidump)
284 for orphan in orphans:
285 host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
286 host.job.record('INFO', None, None, 'End crashcollection record')
287
Alex Miller24c27c12012-08-09 10:24:24 -0700288 orphans.extend(minidumps)
289
290 for minidump in orphans:
291 report_bug_from_crash(host, minidump)
292
Jacob Dufault2ee76b42016-03-23 15:57:27 -0700293 # We copy Chrome crash information to the debug dir to assist debugging.
294 # Since orphans occurred on a previous run, they are most likely not
295 # relevant to the current failure, so we don't copy them.
296 for minidump in minidumps:
297 minidump_no_ext = os.path.splitext(minidump)[0]
298 _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.dmp.txt')
299 _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.log')
300
Alex Miller24c27c12012-08-09 10:24:24 -0700301 return orphans
302
303
Frank Henigman835cf1a2014-09-22 11:36:25 -0400304def find_package_of(host, exec_name):
Alex Miller24c27c12012-08-09 10:24:24 -0700305 """
306 Find the package that an executable came from.
307
308 @param host A host object that has the executable.
Frank Henigman835cf1a2014-09-22 11:36:25 -0400309 @param exec_name Name of or path to executable.
Alex Miller24c27c12012-08-09 10:24:24 -0700310 @return The name of the package that installed the executable.
311 """
Frank Henigman835cf1a2014-09-22 11:36:25 -0400312 # Run "portageq owners" on "host" to determine which package owns
313 # "exec_name." Portageq queue output consists of package names followed
314 # tab-prefixed path names. For example, owners of "python:"
315 #
316 # sys-devel/gdb-7.7.1-r2
317 # /usr/share/gdb/python
318 # chromeos-base/dev-install-0.0.1-r711
319 # /usr/bin/python
320 # dev-lang/python-2.7.3-r7
321 # /etc/env.d/python
322 #
323 # This gets piped into "xargs stat" to annotate each line with
324 # information about the path, so we later can consider only packages
325 # with executable files. After annotation the above looks like:
326 #
327 # stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
328 # stat: cannot stat '/usr/share/gdb/python': ...
329 # stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
330 # 755 -rwxr-xr-x /usr/bin/python
331 # stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
332 # 755 drwxr-xr-x /etc/env.d/python
333 #
334 # Package names are surrounded by "@@@" to facilitate parsing. Lines
335 # starting with an octal number were successfully annotated, because
336 # the path existed on "host."
337 # The above is then parsed to find packages which contain executable files
338 # (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
339 #
Frank Henigman835cf1a2014-09-22 11:36:25 -0400340 # TODO(milleral): portageq can show scary looking error messages
341 # in the debug logs via stderr. We only look at stdout, so those
342 # get filtered, but it would be good to silence them.
343 cmd = ('portageq owners / ' + exec_name +
344 r'| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
345 r'| tr \\n \\0'
Frank Henigman7dcd0dd2015-01-15 21:12:24 -0500346 ' | xargs -0 -r stat -L -c "%a %A %n" 2>&1')
347 portageq = host.run(cmd, ignore_status=True)
Alex Miller24c27c12012-08-09 10:24:24 -0700348
Frank Henigman835cf1a2014-09-22 11:36:25 -0400349 # Parse into a set of names of packages containing an executable file.
350 packages = set()
351 pkg = ''
352 pkg_re = re.compile('@@@ (.*) @@@')
353 path_re = re.compile('^([0-7]{3,}) (.)')
354 for line in portageq.stdout.splitlines():
355 match = pkg_re.search(line)
356 if match:
357 pkg = match.group(1)
358 continue
359 match = path_re.match(line)
360 if match:
361 isexec = int(match.group(1), 8) & 0o111
362 isfile = match.group(2) == '-'
363 if pkg and isexec and isfile:
364 packages.add(pkg)
Alex Miller24c27c12012-08-09 10:24:24 -0700365
Frank Henigman835cf1a2014-09-22 11:36:25 -0400366 # If exactly one package found it must be the one we want, return it.
367 if len(packages) == 1:
368 return packages.pop()
369
370 # TODO(milleral): Decide if it really is an error if not exactly one
371 # package is found.
Alex Miller24c27c12012-08-09 10:24:24 -0700372 # It is highly questionable as to if this should be left in the
373 # production version of this code or not.
374 if len(packages) == 0:
Frank Henigman835cf1a2014-09-22 11:36:25 -0400375 logging.warning('find_package_of() found no packages for "%s"',
376 exec_name)
377 else:
378 logging.warning('find_package_of() found multiple packages for "%s": '
379 '%s', exec_name, ', '.join(packages))
380 return ''
Alex Miller24c27c12012-08-09 10:24:24 -0700381
382
383def report_bug_from_crash(host, minidump_path):
384 """
385 Given a host to query and a minidump, file a bug about the crash.
386
387 @param host A host object that is where the dump came from
388 @param minidump_path The path to the dump file that should be reported.
389 """
390 # TODO(milleral): Once this has actually been tested, remove the
391 # try/except. In the meantime, let's make sure nothing dies because of
392 # the fact that this code isn't very heavily tested.
393 try:
394 meta_path = os.path.splitext(minidump_path)[0] + '.meta'
395 with open(meta_path, 'r') as f:
396 for line in f.readlines():
397 parts = line.split('=')
398 if parts[0] == 'exec_name':
Frank Henigman835cf1a2014-09-22 11:36:25 -0400399 package = find_package_of(host, parts[1].strip())
400 if not package:
401 package = '<unknown package>'
402 logging.info('Would report crash on %s.', package)
Frank Henigman8c91d112015-01-15 16:17:39 -0500403 break
Alex Miller24c27c12012-08-09 10:24:24 -0700404 except Exception as e:
beepscb6f1e22013-06-28 19:14:10 -0700405 logging.warning('Crash detection failed with: %s', e)