[autotest] Record exceptional job termination correctly
Ensure that exception that occur during Reimager.attempt get caught and
logged, so that we can record 'END ERROR' appropriately.
BUG=chromium-os:26550
TEST=new unit test
TEST=manually ran a job with a doomed-to-fail RPC in it
STATUS=Fixed
Change-Id: Ieb076c14424cb7699edfdb88079ea2b43de279e0
Reviewed-on: https://gerrit.chromium.org/gerrit/16441
Commit-Ready: Chris Masone <cmasone@chromium.org>
Reviewed-by: Chris Masone <cmasone@chromium.org>
Tested-by: Chris Masone <cmasone@chromium.org>
diff --git a/server/cros/dynamic_suite.py b/server/cros/dynamic_suite.py
index ed023fa..e0fd6d0 100644
--- a/server/cros/dynamic_suite.py
+++ b/server/cros/dynamic_suite.py
@@ -84,11 +84,11 @@
x86-alex-release/R18-1655.0.0-a1-b1584.
@param board: which kind of devices to reimage.
@param record: callable that records job status.
- prototype:
- record(status, subdir, name, reason)
+ prototype:
+ record(status, subdir, name, reason)
@param num: how many devices to reimage.
@param pool: Specify the pool of machines to use for scheduling
- purposes.
+ purposes.
@return True if all reimaging jobs succeed, false otherwise.
"""
if not num:
@@ -98,16 +98,22 @@
logging.debug("scheduling reimaging across %d machines", num)
wrapper_job_name = 'try new image'
record('START', None, wrapper_job_name)
- self._ensure_version_label(VERSION_PREFIX + build)
- canary = self._schedule_reimage_job(build, num, board)
- logging.debug('Created re-imaging job: %d', canary.id)
- while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
- time.sleep(10)
- logging.debug('Re-imaging job running.')
- while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
- time.sleep(10)
- logging.debug('Re-imaging job finished.')
- canary.result = self._afe.poll_job_results(self._tko, canary, 0)
+ try:
+ self._ensure_version_label(VERSION_PREFIX + build)
+ canary = self._schedule_reimage_job(build, num, board)
+ logging.debug('Created re-imaging job: %d', canary.id)
+ while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
+ time.sleep(10)
+ logging.debug('Re-imaging job running.')
+ while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
+ time.sleep(10)
+ logging.debug('Re-imaging job finished.')
+ canary.result = self._afe.poll_job_results(self._tko, canary, 0)
+ except Exception as e:
+ # catch Exception so we record the job as terminated no matter what.
+ logging.error(e)
+ record('END ERROR', None, wrapper_job_name, str(e))
+ return False
if canary.result is True:
self._report_results(canary, record)
diff --git a/server/cros/dynamic_suite_unittest.py b/server/cros/dynamic_suite_unittest.py
index 2eb934e..04ca80a 100755
--- a/server/cros/dynamic_suite_unittest.py
+++ b/server/cros/dynamic_suite_unittest.py
@@ -167,10 +167,11 @@
self.reimager._schedule_reimage_job(self._BUILD, self._NUM, self._BOARD)
- def expect_attempt(self, success):
+ def expect_attempt(self, success, ex=None):
"""Sets up |self.reimager| to expect an attempt() that returns |success|
- @param success the value returned by poll_job_results()
+ @param success: the value returned by poll_job_results()
+ @param ex: if not None, |ex| is raised by get_jobs()
@return a FakeJob configured with appropriate expectations
"""
canary = FakeJob()
@@ -186,8 +187,12 @@
self.reimager._report_results(canary, mox.IgnoreArg())
self.afe.get_jobs(id=canary.id, not_yet_run=True).AndReturn([])
- self.afe.get_jobs(id=canary.id, finished=True).AndReturn([canary])
- self.afe.poll_job_results(mox.IgnoreArg(), canary, 0).AndReturn(success)
+ if ex is not None:
+ self.afe.get_jobs(id=canary.id, finished=True).AndRaise(ex)
+ else:
+ self.afe.get_jobs(id=canary.id, finished=True).AndReturn([canary])
+ self.afe.poll_job_results(mox.IgnoreArg(),
+ canary, 0).AndReturn(success)
return canary
@@ -226,6 +231,18 @@
self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+ def testReimageThatRaised(self):
+ """Should attempt a reimage that raises an exception and record that."""
+ ex_message = 'Oh no!'
+ canary = self.expect_attempt(None, Exception(ex_message))
+
+ rjob = self.mox.CreateMock(base_job.base_job)
+ rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
+ rjob.record('END ERROR', mox.IgnoreArg(), mox.IgnoreArg(), ex_message)
+ self.mox.ReplayAll()
+ self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+
+
class SuiteTest(mox.MoxTestBase):
"""Unit tests for dynamic_suite.Suite.