[autotest] Force to collect /var/log if test is failed with device error.
When test running in DUT is aborted and does not get a chance to run post test
hooks, diff of /var/log can not be copied to resuts folder and autoserv will
not be able to collect any log from the DUT.
This CL saves the device error failure flag to job.failed_with_device_error.
Autoserv uses this flag to determine whether to collect crash info (through
server/control_segments/crashinfo), which will collect all files in /var/log.
BUG=chromium:271703
TEST=run autoserv in local setup. Manually reboot DUT during the middle of a
test, then confirm the results collected has content in crashinfo.[DUT name].
Change-Id: I1a3757b8933fe60deea75728e867033eeb86c7cd
Reviewed-on: https://gerrit.chromium.org/gerrit/66013
Commit-Queue: Dan Shi <dshi@chromium.org>
Reviewed-by: Dan Shi <dshi@chromium.org>
Tested-by: Dan Shi <dshi@chromium.org>
diff --git a/client/common_lib/base_job_unittest.py b/client/common_lib/base_job_unittest.py
index b29527c..c5eafc5 100755
--- a/client/common_lib/base_job_unittest.py
+++ b/client/common_lib/base_job_unittest.py
@@ -93,6 +93,8 @@
'warning_manager', 'warning_loggers', 'label', 'test_retry',
])
+ OPTIONAL_ATTRIBUTES_DEVICE_ERROR = set(['failed_with_device_error'])
+
def test_public_attributes_initialized(self):
# only the known public attributes should be there after __init__
self.call_init()
@@ -104,7 +106,8 @@
self.assertEqual(missing_attributes, set([]),
'Missing attributes: %s' %
', '.join(sorted(missing_attributes)))
- extra_attributes = public_attributes - expected_attributes
+ extra_attributes = (public_attributes - expected_attributes -
+ self.OPTIONAL_ATTRIBUTES_DEVICE_ERROR)
self.assertEqual(extra_attributes, set([]),
'Extra public attributes found: %s' %
', '.join(sorted(extra_attributes)))
diff --git a/server/autotest.py b/server/autotest.py
index fcc67fc..2199ed1 100644
--- a/server/autotest.py
+++ b/server/autotest.py
@@ -885,6 +885,7 @@
self.host.job.record('FAIL', None, None, str(e))
self.host.job.record('END FAIL', None, None)
self.host.job.record('END GOOD', None, None)
+ self.host.job.failed_with_device_error = True
return
except AutotestAbort as e:
self.host.job.record('ABORT', None, None, str(e))
@@ -944,6 +945,7 @@
try:
self.host.get_file(self.client_results_dir + '/',
self.server_results_dir, preserve_symlinks=True)
+
# Only report time used for successful get_file calls.
timer.stop();
except Exception:
diff --git a/server/crashcollect.py b/server/crashcollect.py
index 38a3b26..a8ac8a5 100644
--- a/server/crashcollect.py
+++ b/server/crashcollect.py
@@ -1,7 +1,9 @@
-import os, time, pickle, logging, shutil
+import os, time, logging, shutil
from autotest_lib.client.common_lib import global_config
+from autotest_lib.client.cros import constants
from autotest_lib.server import utils
+from autotest_lib.site_utils.graphite import stats
# import any site hooks for the crashdump and crashinfo collection
@@ -32,6 +34,11 @@
collect_command(host, "dmesg", os.path.join(crashinfo_dir, "dmesg"))
collect_uncollected_logs(host)
+ # Collect everything in /var/log.
+ log_path = os.path.join(crashinfo_dir, 'var')
+ os.makedirs(log_path)
+ collect_log_file(host, constants.LOG_DIR, log_path)
+
# Load default for number of hours to wait before giving up on crash collection.
HOURS_TO_WAIT = global_config.global_config.get_config_value(
@@ -54,6 +61,7 @@
logging.info("Waiting %s hours for %s to come up (%s)",
hours_to_wait, host.hostname, current_time)
if not host.wait_up(timeout=hours_to_wait * 3600):
+ stats.Counter('collect_crashinfo_timeout').increment()
logging.warning("%s down, unable to collect crash info",
host.hostname)
return False
diff --git a/server/server_job.py b/server/server_job.py
index 8bcf130..d6a2177 100644
--- a/server/server_job.py
+++ b/server/server_job.py
@@ -242,6 +242,10 @@
self, self._indenter, 'status.log', 'status.log',
record_hook=server_job_record_hook(self))
+ # Initialize a flag to indicate DUT failure during the test, e.g.,
+ # unexpected reboot.
+ self.failed_with_device_error = False
+
@classmethod
def _find_base_directories(cls):
@@ -595,8 +599,8 @@
self._execute_code(server_control_file, namespace)
logging.info("Finished processing control file")
- # no error occured, so we don't need to collect crashinfo
- collect_crashinfo = False
+ # If no device error occured, no need to collect crashinfo.
+ collect_crashinfo = self.failed_with_device_error
except Exception, e:
try:
logging.exception(