[autotest] Clear up host attributes and labels after suite run
When we reimage a device, we label it with the build we put on there,
and then attach a host-attribute indicating where autotest should
search for packages at test time.
It behooves us to clean up after ourselves.
BUG=chromium-os:27226
TEST=./site_utils/run_suite.py -b x86-mario -i x86-mario-release/R19-1916.0.0-a1-b1736 -s dummy
TEST=After the suite runs, check on the autotest front end to ensure
TEST=that the hosts used don't have any cros-version-* labels on them.
STATUS=Fixed
Change-Id: Ieda46de291b1bfb487d2454cc65c71aecd7abead
Reviewed-on: https://gerrit.chromium.org/gerrit/17537
Reviewed-by: Scott Zawalski <scottz@chromium.org>
Commit-Ready: Chris Masone <cmasone@chromium.org>
Tested-by: Chris Masone <cmasone@chromium.org>
diff --git a/server/cros/dynamic_suite.py b/server/cros/dynamic_suite.py
index dc8d7cb..74f91cd 100644
--- a/server/cros/dynamic_suite.py
+++ b/server/cros/dynamic_suite.py
@@ -57,11 +57,14 @@
if pool:
pool = 'pool:%s' % pool
reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
+
if skip_reimage or reimager.attempt(build, board, job.record, num=num):
suite = Suite.create_from_name(name, build, pool=pool,
results_dir=job.resultdir)
suite.run_and_wait(job.record, add_experimental=add_experimental)
+ reimager.clear_reimaged_host_state(build)
+
def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
pool=None, num=None, skip_reimage=False,
@@ -162,6 +165,7 @@
debug=False)
self._pool = pool
self._results_dir = results_dir
+ self._reimaged_hosts = {}
self._cf_getter = control_file_getter.FileSystemGetter(
[os.path.join(autotest_dir, 'server/site_tests')])
@@ -194,21 +198,25 @@
wrapper_job_name = 'try_new_image'
record('START', None, wrapper_job_name)
try:
+ # Determine if there are enough working hosts to run on.
labels = [l for l in [board, self._pool] if l is not None]
if num > self._count_usable_hosts(labels):
raise InadequateHostsException("Too few hosts with %r" % labels)
+ # Schedule job and record job metadata.
self._ensure_version_label(VERSION_PREFIX + build)
- canary = self._schedule_reimage_job(build, num, board)
- self._record_job_if_possible(wrapper_job_name, canary)
- logging.debug('Created re-imaging job: %d', canary.id)
- while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
- time.sleep(10)
- logging.debug('Re-imaging job running.')
- while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
- time.sleep(10)
- logging.debug('Re-imaging job finished.')
- canary.result = self._afe.poll_job_results(self._tko, canary, 0)
+ canary_job = self._schedule_reimage_job(build, num, board)
+ self._record_job_if_possible(wrapper_job_name, canary_job)
+ logging.debug('Created re-imaging job: %d', canary_job.id)
+
+ # Poll until reimaging is complete.
+ self._wait_for_job_to_start(canary_job.id)
+ self._wait_for_job_to_finish(canary_job.id)
+
+ # Gather job results.
+ canary_job.result = self._afe.poll_job_results(self._tko,
+ canary_job,
+ 0)
except InadequateHostsException as e:
logging.warning(e)
record('END WARN', None, wrapper_job_name, str(e))
@@ -219,20 +227,88 @@
record('END ERROR', None, wrapper_job_name, str(e))
return False
- if canary.result is True:
- self._report_results(canary, record)
+ self._remember_reimaged_hosts(build, canary_job)
+
+ if canary_job.result is True:
+ self._report_results(canary_job, record)
record('END GOOD', None, wrapper_job_name)
return True
- if canary.result is None:
- record('FAIL', None, canary.name, 're-imaging tasks did not run')
- else: # canary.result is False
- self._report_results(canary, record)
+ if canary_job.result is None:
+ record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
+ else: # canary_job.result is False
+ self._report_results(canary_job, record)
record('END FAIL', None, wrapper_job_name)
return False
+ def _wait_for_job_to_start(self, job_id):
+ """
+ Wait for the job specified by |job_id| to start.
+
+ @param job_id: the job ID to poll on.
+ """
+ while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
+ time.sleep(10)
+ logging.debug('Re-imaging job running.')
+
+
+ def _wait_for_job_to_finish(self, job_id):
+ """
+ Wait for the job specified by |job_id| to finish.
+
+ @param job_id: the job ID to poll on.
+ """
+ while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
+ time.sleep(10)
+ logging.debug('Re-imaging job finished.')
+
+
+ def _remember_reimaged_hosts(self, build, canary_job):
+ """
+ Remember hosts that were reimaged with |build| as a part |canary_job|.
+
+ @param build: the build that was installed e.g.
+ x86-alex-release/R18-1655.0.0-a1-b1584.
+ @param canary_job: a completed frontend.Job object, possibly populated
+ by frontend.AFE.poll_job_results.
+ """
+ if not hasattr(canary_job, 'results_platform_map'):
+ return
+ if not self._reimaged_hosts.get('build'):
+ self._reimaged_hosts[build] = []
+ for platform in canary_job.results_platform_map:
+ for host in canary_job.results_platform_map[platform]['Total']:
+ self._reimaged_hosts[build].append(host)
+
+
+ def clear_reimaged_host_state(self, build):
+ """
+ Clear per-host state created in the autotest DB for this job.
+
+ After reimaging a host, we label it and set some host attributes on it
+ that are then used by the suite scheduling code. This call cleans
+ that up.
+
+ @param build: the build whose hosts we want to clean up e.g.
+ x86-alex-release/R18-1655.0.0-a1-b1584.
+ """
+ labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
+ for label in labels: self._afe.run('delete_label', id=label.id)
+ for host in self._reimaged_hosts.get('build', []):
+ self._clear_build_state(host)
+
+
+ def _clear_build_state(self, machine):
+ """
+ Clear all build-specific labels, attributes from the target.
+
+ @param machine: the host to clear labels, attributes from.
+ """
+ self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
+
+
def _record_job_if_possible(self, test_name, job):
"""
Record job id as keyval, if possible, so it can be referenced later.
diff --git a/server/cros/dynamic_suite_unittest.py b/server/cros/dynamic_suite_unittest.py
index f276688..311f678 100755
--- a/server/cros/dynamic_suite_unittest.py
+++ b/server/cros/dynamic_suite_unittest.py
@@ -8,6 +8,7 @@
import logging
import mox
+import random
import shutil
import tempfile
import time
@@ -21,6 +22,7 @@
"""Faked out RPC-client-side Job object."""
def __init__(self, id=0, statuses=[]):
self.id = id
+ self.hostname = 'host%d' % id
self.owner = 'tester'
self.name = 'Fake Job %d' % self.id
self.statuses = statuses
@@ -31,6 +33,11 @@
def __init__(self, status='Ready'):
self.status = status
+class FakeLabel(object):
+ """Faked out RPC-client-side Label object."""
+ def __init__(self, id=0):
+ self.id = id
+
class DynamicSuiteTest(mox.MoxTestBase):
"""Unit tests for dynamic_suite module methods.
@@ -290,9 +297,23 @@
self.reimager._schedule_reimage_job(self._BUILD, self._NUM, self._BOARD)
+ def expect_label_cleanup(self, build):
+ """Sets up |self.afe| to expect deletion of the version label.
+
+ @param build: the build the label is named after.
+ """
+ label = FakeLabel(id=random.randrange(0, 5))
+ self.afe.get_labels(
+ name__startswith=mox.StrContains(build)).AndReturn([label])
+ self.afe.run('delete_label', id=label.id)
+
+
def expect_attempt(self, success, ex=None):
"""Sets up |self.reimager| to expect an attempt() that returns |success|
+ Also stubs out Reimger._clear_build_state(), should the caller wish
+ to set an expectation there as well.
+
@param success: the value returned by poll_job_results()
@param ex: if not None, |ex| is raised by get_jobs()
@return a FakeJob configured with appropriate expectations
@@ -312,6 +333,8 @@
if success is not None:
self.mox.StubOutWithMock(self.reimager, '_report_results')
self.reimager._report_results(canary, mox.IgnoreArg())
+ canary.results_platform_map = {None: {'Total': [canary.hostname]}}
+
self.afe.get_jobs(id=canary.id, not_yet_run=True).AndReturn([])
if ex is not None:
@@ -321,6 +344,9 @@
self.afe.poll_job_results(mox.IgnoreArg(),
canary, 0).AndReturn(success)
+ self.expect_label_cleanup(self._BUILD)
+ self.mox.StubOutWithMock(self.reimager, '_clear_build_state')
+
return canary
@@ -331,8 +357,10 @@
rjob = self.mox.CreateMock(base_job.base_job)
rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
rjob.record('END GOOD', mox.IgnoreArg(), mox.IgnoreArg())
+ self.reimager._clear_build_state(mox.StrContains(canary.hostname))
self.mox.ReplayAll()
self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+ self.reimager.clear_reimaged_host_state(self._BUILD)
def testFailedReimage(self):
@@ -342,8 +370,10 @@
rjob = self.mox.CreateMock(base_job.base_job)
rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
rjob.record('END FAIL', mox.IgnoreArg(), mox.IgnoreArg())
+ self.reimager._clear_build_state(mox.StrContains(canary.hostname))
self.mox.ReplayAll()
self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+ self.reimager.clear_reimaged_host_state(self._BUILD)
def testReimageThatNeverHappened(self):
@@ -356,6 +386,7 @@
rjob.record('END FAIL', mox.IgnoreArg(), mox.IgnoreArg())
self.mox.ReplayAll()
self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+ self.reimager.clear_reimaged_host_state(self._BUILD)
def testReimageThatRaised(self):
@@ -368,6 +399,7 @@
rjob.record('END ERROR', mox.IgnoreArg(), mox.IgnoreArg(), ex_message)
self.mox.ReplayAll()
self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+ self.reimager.clear_reimaged_host_state(self._BUILD)
def testReimageThatCouldNotSchedule(self):
@@ -381,8 +413,10 @@
rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
rjob.record('END WARN', mox.IgnoreArg(), mox.IgnoreArg(),
mox.StrContains('Too few hosts'))
+ self.expect_label_cleanup(self._BUILD)
self.mox.ReplayAll()
self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+ self.reimager.clear_reimaged_host_state(self._BUILD)
class SuiteTest(mox.MoxTestBase):
diff --git a/server/site_tests/autoupdate/control b/server/site_tests/autoupdate/control
index 578dfdc..2b0f27d 100644
--- a/server/site_tests/autoupdate/control
+++ b/server/site_tests/autoupdate/control
@@ -21,7 +21,7 @@
from autotest_lib.server import frontend
-vers = 'cros-version-'
+vers = 'cros-version:'
repo_url = None
if 'image_name' in locals():
from autotest_lib.server.cros import dynamic_suite
@@ -31,6 +31,14 @@
AFE = frontend.AFE(debug=False)
def clear_version_labels(machine):
+ """Clear all build-specific labels, attributes from the target.
+
+ Copied from server/cros/dynamic_suite.py, because we can't be sure that
+ code will be available in all contexts in which this control file is
+ currently used.
+
+ @param machine: the host to clear labels, attributes from.
+ """
labels = AFE.get_labels(name__startswith=vers)
for label in labels: label.remove_hosts(hosts=[machine])
AFE.set_host_attribute('job_repo_url', None, hostname=machine)