server/site_crashcollect.py - platform/external/autotest - Gitiles

 # Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import logging
 import os
 import re
 import shutil
 from autotest_lib.client.common_lib import utils as client_utils
 from autotest_lib.client.common_lib.cros import dev_server
 from autotest_lib.client.common_lib.cros import retry
 from autotest_lib.client.common_lib.cros.graphite import autotest_stats
 from autotest_lib.client.cros import constants
 from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
 from autotest_lib.server import utils


 CRASH_SERVER_OVERLOAD = 'crash_server_overload'
 CRASH_SERVER_FOUND = 'crash_server_found'
 SYMBOLICATE_TIMEDOUT = 'symbolicate_timedout'

 timer = autotest_stats.Timer('crash_collect')

 def generate_minidump_stacktrace(minidump_path):
     """
     Generates a stacktrace for the specified minidump.

     This function expects the debug symbols to reside under:
         /build/<board>/usr/lib/debug

     @param minidump_path: absolute path to minidump to by symbolicated.
     @raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
     """
     symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
     logging.info('symbol_dir: %s', symbol_dir)
     client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
                      (minidump_path, symbol_dir, minidump_path))


 @timer.decorate
 def symbolicate_minidump_with_devserver(minidump_path, resultdir):
     """
     Generates a stack trace for the specified minidump by consulting devserver.

     This function assumes the debug symbols have been staged on the devserver.

     @param minidump_path: absolute path to minidump to by symbolicated.
     @param resultdir: server job's result directory.
     @raise DevServerException upon failure, HTTP or otherwise.
     """
     # First, look up what build we tested.  If we can't find this, we can't
     # get the right debug symbols, so we might as well give up right now.
     keyvals = client_utils.read_keyval(resultdir)
     if JOB_BUILD_KEY not in keyvals:
         raise dev_server.DevServerException(
             'Cannot determine build being tested.')

     crashserver_name = dev_server.get_least_loaded_devserver(
             devserver_type=dev_server.CrashServer)
     if not crashserver_name:
         autotest_stats.Counter(CRASH_SERVER_OVERLOAD).increment()
         raise dev_server.DevServerException(
                 'No crash server has the capacity to symbolicate the dump.')
     else:
         autotest_stats.Counter(CRASH_SERVER_FOUND).increment()
     devserver = dev_server.CrashServer(crashserver_name)
     trace_text = devserver.symbolicate_dump(
         minidump_path, keyvals[JOB_BUILD_KEY])
     if not trace_text:
         raise dev_server.DevServerException('Unknown error!!')
     with open(minidump_path + '.txt', 'w') as trace_file:
         trace_file.write(trace_text)

 def generate_stacktrace_for_file(minidump, host_resultdir):
     """
     Tries to generate a stack trace for the file located at |minidump|.
     @param minidump: path to minidump file to generate the stacktrace for.
     @param host_resultdir: server job's result directory.
     """
     # First, try to symbolicate locally.
     try:
         logging.info('Trying to generate stack trace locally for %s', minidump)
         generate_minidump_stacktrace(minidump)
         logging.info('Generated stack trace for dump %s', minidump)
         return
     except client_utils.error.CmdError as err:
         logging.info('Failed to generate stack trace locally for '
                      'dump %s (rc=%d):\n%r',
                      minidump, err.result_obj.exit_status, err)

     # If that did not succeed, try to symbolicate using the dev server.
     try:
         logging.info('Generating stack trace using devserver for %s', minidump)
         is_timeout, _ = retry.timeout(symbolicate_minidump_with_devserver,
                                       args=(minidump, host_resultdir),
                                       timeout_sec=600)
         if is_timeout:
             logging.info('Generating stack trace timed out for dump %s',
                          minidump)
             autotest_stats.Counter(SYMBOLICATE_TIMEDOUT).increment()
         else:
             logging.info('Generated stack trace for dump %s', minidump)
             return
     except dev_server.DevServerException as e:
         logging.info('Failed to generate stack trace on devserver for dump '
                      '%s:\n%r', minidump, e)

     # Symbolicating failed.
     logging.warning('Failed to generate stack trace for %s (see info logs)',
                     minidump)

 def find_and_generate_minidump_stacktraces(host_resultdir):
     """
     Finds all minidump files and generates a stack trace for each.

     Enumerates all files under the test results directory (recursively)
     and generates a stack trace file for the minidumps.  Minidump files are
     identified as files with .dmp extension.  The stack trace filename is
     composed by appending the .txt extension to the minidump filename.

     @param host_resultdir: Directory to walk looking for dmp files.

     @returns The list of all found minidump files. Each dump may or may not have
              been symbolized.
     """
     minidumps = []
     for dir, subdirs, files in os.walk(host_resultdir):
         for file in files:
             if not file.endswith('.dmp'):
                 continue
             minidump = os.path.join(dir, file)
             generate_stacktrace_for_file(minidump, host_resultdir)
             minidumps.append(minidump)

     return minidumps


 def fetch_orphaned_crashdumps(host, host_resultdir):
     """
     Copy all of the crashes in the crash directory over to the results folder.

     @param host A host object of the device we're to pull crashes from.
     @param host_resultdir The result directory for this host for this test run.
     @return The list of minidumps that we pulled back from the host.
     """
     minidumps = []
     for file in host.list_files_glob(os.path.join(constants.CRASH_DIR, '*')):
         logging.info('Collecting %s...', file)
         host.get_file(file, host_resultdir, preserve_perm=False)
         minidumps.append(file)
     return minidumps


 def _copy_to_debug_dir(host_resultdir, filename):
     """
     Copies a file to the debug dir under host_resultdir.

     @param host_resultdir The result directory for this host for this test run.
     @param filename The full path of the file to copy to the debug folder.
     """
     debugdir = os.path.join(host_resultdir, 'debug')
     src = filename
     dst = os.path.join(debugdir, os.path.basename(filename))

     try:
         shutil.copyfile(src, dst)
         logging.info('Copied %s to %s', src, dst)
     except IOError:
         logging.warning('Failed to copy %s to %s', src, dst)


 def get_site_crashdumps(host, test_start_time):
     """
     Copy all of the crashdumps from a host to the results directory.

     @param host The host object from which to pull crashes
     @param test_start_time When the test we just ran started.
     @return A list of all the minidumps
     """
     host_resultdir = getattr(getattr(host, 'job', None), 'resultdir', None)
     infodir = os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
     if not os.path.exists(infodir):
         os.mkdir(infodir)

     # TODO(milleral): handle orphans differently. crosbug.com/38202
     try:
         orphans = fetch_orphaned_crashdumps(host, infodir)

         # Delete infodir if we have no orphans
         if not orphans:
             logging.info('There are no orphaned crashes; deleting %s', infodir)
             os.rmdir(infodir)

     except Exception as e:
         orphans = []
         logging.warning('Collection of orphaned crash dumps failed %s', e)

     minidumps = find_and_generate_minidump_stacktraces(host_resultdir)

     # Record all crashdumps in status.log of the job:
     # - If one server job runs several client jobs we will only record
     # crashdumps in the status.log of the high level server job.
     # - We will record these crashdumps whether or not we successfully
     # symbolicate them.
     if host.job and minidumps or orphans:
         host.job.record('INFO', None, None, 'Start crashcollection record')
         for minidump in minidumps:
             host.job.record('INFO', None, 'New Crash Dump', minidump)
         for orphan in orphans:
             host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
         host.job.record('INFO', None, None, 'End crashcollection record')

     orphans.extend(minidumps)

     for minidump in orphans:
         report_bug_from_crash(host, minidump)

     # We copy Chrome crash information to the debug dir to assist debugging.
     # Since orphans occurred on a previous run, they are most likely not
     # relevant to the current failure, so we don't copy them.
     for minidump in minidumps:
         minidump_no_ext = os.path.splitext(minidump)[0]
         _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.dmp.txt')
         _copy_to_debug_dir(host_resultdir, minidump_no_ext + '.log')

     return orphans


 def find_package_of(host, exec_name):
     """
     Find the package that an executable came from.

     @param host A host object that has the executable.
     @param exec_name Name of or path to executable.
     @return The name of the package that installed the executable.
     """
     # Run "portageq owners" on "host" to determine which package owns
     # "exec_name."  Portageq queue output consists of package names followed
     # tab-prefixed path names.  For example, owners of "python:"
     #
     # sys-devel/gdb-7.7.1-r2
     #         /usr/share/gdb/python
     # chromeos-base/dev-install-0.0.1-r711
     #         /usr/bin/python
     # dev-lang/python-2.7.3-r7
     #         /etc/env.d/python
     #
     # This gets piped into "xargs stat" to annotate each line with
     # information about the path, so we later can consider only packages
     # with executable files.  After annotation the above looks like:
     #
     # stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
     # stat: cannot stat '/usr/share/gdb/python': ...
     # stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
     # 755 -rwxr-xr-x /usr/bin/python
     # stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
     # 755 drwxr-xr-x /etc/env.d/python
     #
     # Package names are surrounded by "@@@" to facilitate parsing.  Lines
     # starting with an octal number were successfully annotated, because
     # the path existed on "host."
     # The above is then parsed to find packages which contain executable files
     # (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
     #
     # TODO(milleral): portageq can show scary looking error messages
     # in the debug logs via stderr. We only look at stdout, so those
     # get filtered, but it would be good to silence them.
     cmd = ('portageq owners / ' + exec_name +
             r'| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
             r'| tr \\n \\0'
             ' | xargs -0 -r stat -L -c "%a %A %n" 2>&1')
     portageq = host.run(cmd, ignore_status=True)

     # Parse into a set of names of packages containing an executable file.
     packages = set()
     pkg = ''
     pkg_re = re.compile('@@@ (.*) @@@')
     path_re = re.compile('^([0-7]{3,}) (.)')
     for line in portageq.stdout.splitlines():
         match = pkg_re.search(line)
         if match:
             pkg = match.group(1)
             continue
         match = path_re.match(line)
         if match:
             isexec = int(match.group(1), 8) & 0o111
             isfile = match.group(2) == '-'
             if pkg and isexec and isfile:
                 packages.add(pkg)

     # If exactly one package found it must be the one we want, return it.
     if len(packages) == 1:
         return packages.pop()

     # TODO(milleral): Decide if it really is an error if not exactly one
     # package is found.
     # It is highly questionable as to if this should be left in the
     # production version of this code or not.
     if len(packages) == 0:
         logging.warning('find_package_of() found no packages for "%s"',
                         exec_name)
     else:
         logging.warning('find_package_of() found multiple packages for "%s": '
                         '%s', exec_name, ', '.join(packages))
     return ''


 def report_bug_from_crash(host, minidump_path):
     """
     Given a host to query and a minidump, file a bug about the crash.

     @param host A host object that is where the dump came from
     @param minidump_path The path to the dump file that should be reported.
     """
     # TODO(milleral): Once this has actually been tested, remove the
     # try/except. In the meantime, let's make sure nothing dies because of
     # the fact that this code isn't very heavily tested.
     try:
         meta_path = os.path.splitext(minidump_path)[0] + '.meta'
         with open(meta_path, 'r') as f:
             for line in f.readlines():
                 parts = line.split('=')
                 if parts[0] == 'exec_name':
                     package = find_package_of(host, parts[1].strip())
                     if not package:
                         package = '<unknown package>'
                     logging.info('Would report crash on %s.', package)
                     break
     except Exception as e:
         logging.warning('Crash detection failed with: %s', e)
	# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import logging
	import os
	import re
	import shutil
	from autotest_lib.client.common_lib import utils as client_utils
	from autotest_lib.client.common_lib.cros import dev_server
	from autotest_lib.client.common_lib.cros import retry
	from autotest_lib.client.common_lib.cros.graphite import autotest_stats
	from autotest_lib.client.cros import constants
	from autotest_lib.server.cros.dynamic_suite.constants import JOB_BUILD_KEY
	from autotest_lib.server import utils


	CRASH_SERVER_OVERLOAD = 'crash_server_overload'
	CRASH_SERVER_FOUND = 'crash_server_found'
	SYMBOLICATE_TIMEDOUT = 'symbolicate_timedout'

	timer = autotest_stats.Timer('crash_collect')

	def generate_minidump_stacktrace(minidump_path):
	"""
	Generates a stacktrace for the specified minidump.

	This function expects the debug symbols to reside under:
	/build/<board>/usr/lib/debug

	@param minidump_path: absolute path to minidump to by symbolicated.
	@raise client_utils.error.CmdError if minidump_stackwalk return code != 0.
	"""
	symbol_dir = '%s/../../../lib/debug' % utils.get_server_dir()
	logging.info('symbol_dir: %s', symbol_dir)
	client_utils.run('minidump_stackwalk "%s" "%s" > "%s.txt"' %
	(minidump_path, symbol_dir, minidump_path))


	@timer.decorate
	def symbolicate_minidump_with_devserver(minidump_path, resultdir):
	"""
	Generates a stack trace for the specified minidump by consulting devserver.

	This function assumes the debug symbols have been staged on the devserver.

	@param minidump_path: absolute path to minidump to by symbolicated.
	@param resultdir: server job's result directory.
	@raise DevServerException upon failure, HTTP or otherwise.
	"""
	# First, look up what build we tested. If we can't find this, we can't
	# get the right debug symbols, so we might as well give up right now.
	keyvals = client_utils.read_keyval(resultdir)
	if JOB_BUILD_KEY not in keyvals:
	raise dev_server.DevServerException(
	'Cannot determine build being tested.')

	crashserver_name = dev_server.get_least_loaded_devserver(
	devserver_type=dev_server.CrashServer)
	if not crashserver_name:
	autotest_stats.Counter(CRASH_SERVER_OVERLOAD).increment()
	raise dev_server.DevServerException(
	'No crash server has the capacity to symbolicate the dump.')
	else:
	autotest_stats.Counter(CRASH_SERVER_FOUND).increment()
	devserver = dev_server.CrashServer(crashserver_name)
	trace_text = devserver.symbolicate_dump(
	minidump_path, keyvals[JOB_BUILD_KEY])
	if not trace_text:
	raise dev_server.DevServerException('Unknown error!!')
	with open(minidump_path + '.txt', 'w') as trace_file:
	trace_file.write(trace_text)

	def generate_stacktrace_for_file(minidump, host_resultdir):
	"""
	Tries to generate a stack trace for the file located at \|minidump\|.
	@param minidump: path to minidump file to generate the stacktrace for.
	@param host_resultdir: server job's result directory.
	"""
	# First, try to symbolicate locally.
	try:
	logging.info('Trying to generate stack trace locally for %s', minidump)
	generate_minidump_stacktrace(minidump)
	logging.info('Generated stack trace for dump %s', minidump)
	return
	except client_utils.error.CmdError as err:
	logging.info('Failed to generate stack trace locally for '
	'dump %s (rc=%d):\n%r',
	minidump, err.result_obj.exit_status, err)

	# If that did not succeed, try to symbolicate using the dev server.
	try:
	logging.info('Generating stack trace using devserver for %s', minidump)
	is_timeout, _ = retry.timeout(symbolicate_minidump_with_devserver,
	args=(minidump, host_resultdir),
	timeout_sec=600)
	if is_timeout:
	logging.info('Generating stack trace timed out for dump %s',
	minidump)
	autotest_stats.Counter(SYMBOLICATE_TIMEDOUT).increment()
	else:
	logging.info('Generated stack trace for dump %s', minidump)
	return
	except dev_server.DevServerException as e:
	logging.info('Failed to generate stack trace on devserver for dump '
	'%s:\n%r', minidump, e)

	# Symbolicating failed.
	logging.warning('Failed to generate stack trace for %s (see info logs)',
	minidump)

	def find_and_generate_minidump_stacktraces(host_resultdir):
	"""
	Finds all minidump files and generates a stack trace for each.

	Enumerates all files under the test results directory (recursively)
	and generates a stack trace file for the minidumps. Minidump files are
	identified as files with .dmp extension. The stack trace filename is
	composed by appending the .txt extension to the minidump filename.

	@param host_resultdir: Directory to walk looking for dmp files.

	@returns The list of all found minidump files. Each dump may or may not have
	been symbolized.
	"""
	minidumps = []
	for dir, subdirs, files in os.walk(host_resultdir):
	for file in files:
	if not file.endswith('.dmp'):
	continue
	minidump = os.path.join(dir, file)
	generate_stacktrace_for_file(minidump, host_resultdir)
	minidumps.append(minidump)

	return minidumps


	def fetch_orphaned_crashdumps(host, host_resultdir):
	"""
	Copy all of the crashes in the crash directory over to the results folder.

	@param host A host object of the device we're to pull crashes from.
	@param host_resultdir The result directory for this host for this test run.
	@return The list of minidumps that we pulled back from the host.
	"""
	minidumps = []
	for file in host.list_files_glob(os.path.join(constants.CRASH_DIR, '*')):
	logging.info('Collecting %s...', file)
	host.get_file(file, host_resultdir, preserve_perm=False)
	minidumps.append(file)
	return minidumps


	def _copy_to_debug_dir(host_resultdir, filename):
	"""
	Copies a file to the debug dir under host_resultdir.

	@param host_resultdir The result directory for this host for this test run.
	@param filename The full path of the file to copy to the debug folder.
	"""
	debugdir = os.path.join(host_resultdir, 'debug')
	src = filename
	dst = os.path.join(debugdir, os.path.basename(filename))

	try:
	shutil.copyfile(src, dst)
	logging.info('Copied %s to %s', src, dst)
	except IOError:
	logging.warning('Failed to copy %s to %s', src, dst)


	def get_site_crashdumps(host, test_start_time):
	"""
	Copy all of the crashdumps from a host to the results directory.

	@param host The host object from which to pull crashes
	@param test_start_time When the test we just ran started.
	@return A list of all the minidumps
	"""
	host_resultdir = getattr(getattr(host, 'job', None), 'resultdir', None)
	infodir = os.path.join(host_resultdir, 'crashinfo.%s' % host.hostname)
	if not os.path.exists(infodir):
	os.mkdir(infodir)

	# TODO(milleral): handle orphans differently. crosbug.com/38202
	try:
	orphans = fetch_orphaned_crashdumps(host, infodir)

	# Delete infodir if we have no orphans
	if not orphans:
	logging.info('There are no orphaned crashes; deleting %s', infodir)
	os.rmdir(infodir)

	except Exception as e:
	orphans = []
	logging.warning('Collection of orphaned crash dumps failed %s', e)

	minidumps = find_and_generate_minidump_stacktraces(host_resultdir)

	# Record all crashdumps in status.log of the job:
	# - If one server job runs several client jobs we will only record
	# crashdumps in the status.log of the high level server job.
	# - We will record these crashdumps whether or not we successfully
	# symbolicate them.
	if host.job and minidumps or orphans:
	host.job.record('INFO', None, None, 'Start crashcollection record')
	for minidump in minidumps:
	host.job.record('INFO', None, 'New Crash Dump', minidump)
	for orphan in orphans:
	host.job.record('INFO', None, 'Orphaned Crash Dump', orphan)
	host.job.record('INFO', None, None, 'End crashcollection record')

	orphans.extend(minidumps)

	for minidump in orphans:
	report_bug_from_crash(host, minidump)

	# We copy Chrome crash information to the debug dir to assist debugging.
	# Since orphans occurred on a previous run, they are most likely not
	# relevant to the current failure, so we don't copy them.
	for minidump in minidumps:
	minidump_no_ext = os.path.splitext(minidump)[0]
	_copy_to_debug_dir(host_resultdir, minidump_no_ext + '.dmp.txt')
	_copy_to_debug_dir(host_resultdir, minidump_no_ext + '.log')

	return orphans


	def find_package_of(host, exec_name):
	"""
	Find the package that an executable came from.

	@param host A host object that has the executable.
	@param exec_name Name of or path to executable.
	@return The name of the package that installed the executable.
	"""
	# Run "portageq owners" on "host" to determine which package owns
	# "exec_name." Portageq queue output consists of package names followed
	# tab-prefixed path names. For example, owners of "python:"
	#
	# sys-devel/gdb-7.7.1-r2
	# /usr/share/gdb/python
	# chromeos-base/dev-install-0.0.1-r711
	# /usr/bin/python
	# dev-lang/python-2.7.3-r7
	# /etc/env.d/python
	#
	# This gets piped into "xargs stat" to annotate each line with
	# information about the path, so we later can consider only packages
	# with executable files. After annotation the above looks like:
	#
	# stat: cannot stat '@@@ sys-devel/gdb-7.7.1-r2 @@@': ...
	# stat: cannot stat '/usr/share/gdb/python': ...
	# stat: cannot stat '@@@ chromeos-base/dev-install-0.0.1-r711 @@@': ...
	# 755 -rwxr-xr-x /usr/bin/python
	# stat: cannot stat '@@@ dev-lang/python-2.7.3-r7 @@@': ...
	# 755 drwxr-xr-x /etc/env.d/python
	#
	# Package names are surrounded by "@@@" to facilitate parsing. Lines
	# starting with an octal number were successfully annotated, because
	# the path existed on "host."
	# The above is then parsed to find packages which contain executable files
	# (not directories), in this case "chromeos-base/dev-install-0.0.1-r711."
	#
	# TODO(milleral): portageq can show scary looking error messages
	# in the debug logs via stderr. We only look at stdout, so those
	# get filtered, but it would be good to silence them.
	cmd = ('portageq owners / ' + exec_name +
	r'\| sed -e "s/^[^\t].*/@@@ & @@@/" -e "s/^\t//"'
	r'\| tr \\n \\0'
	' \| xargs -0 -r stat -L -c "%a %A %n" 2>&1')
	portageq = host.run(cmd, ignore_status=True)

	# Parse into a set of names of packages containing an executable file.
	packages = set()
	pkg = ''
	pkg_re = re.compile('@@@ (.*) @@@')
	path_re = re.compile('^([0-7]{3,}) (.)')
	for line in portageq.stdout.splitlines():
	match = pkg_re.search(line)
	if match:
	pkg = match.group(1)
	continue
	match = path_re.match(line)
	if match:
	isexec = int(match.group(1), 8) & 0o111
	isfile = match.group(2) == '-'
	if pkg and isexec and isfile:
	packages.add(pkg)

	# If exactly one package found it must be the one we want, return it.
	if len(packages) == 1:
	return packages.pop()

	# TODO(milleral): Decide if it really is an error if not exactly one
	# package is found.
	# It is highly questionable as to if this should be left in the
	# production version of this code or not.
	if len(packages) == 0:
	logging.warning('find_package_of() found no packages for "%s"',
	exec_name)
	else:
	logging.warning('find_package_of() found multiple packages for "%s": '
	'%s', exec_name, ', '.join(packages))
	return ''


	def report_bug_from_crash(host, minidump_path):
	"""
	Given a host to query and a minidump, file a bug about the crash.

	@param host A host object that is where the dump came from
	@param minidump_path The path to the dump file that should be reported.
	"""
	# TODO(milleral): Once this has actually been tested, remove the
	# try/except. In the meantime, let's make sure nothing dies because of
	# the fact that this code isn't very heavily tested.
	try:
	meta_path = os.path.splitext(minidump_path)[0] + '.meta'
	with open(meta_path, 'r') as f:
	for line in f.readlines():
	parts = line.split('=')
	if parts[0] == 'exec_name':
	package = find_package_of(host, parts[1].strip())
	if not package:
	package = '<unknown package>'
	logging.info('Would report crash on %s.', package)
	break
	except Exception as e:
	logging.warning('Crash detection failed with: %s', e)