[autotest] Clear up host attributes and labels after suite run

When we reimage a device, we label it with the build we put on there,
and then attach a host-attribute indicating where autotest should
search for packages at test time.

It behooves us to clean up after ourselves.

BUG=chromium-os:27226
TEST=./site_utils/run_suite.py -b x86-mario -i x86-mario-release/R19-1916.0.0-a1-b1736 -s dummy
TEST=After the suite runs, check on the autotest front end to ensure
TEST=that the hosts used don't have any cros-version-* labels on them.
STATUS=Fixed

Change-Id: Ieda46de291b1bfb487d2454cc65c71aecd7abead
Reviewed-on: https://gerrit.chromium.org/gerrit/17537
Reviewed-by: Scott Zawalski <scottz@chromium.org>
Commit-Ready: Chris Masone <cmasone@chromium.org>
Tested-by: Chris Masone <cmasone@chromium.org>
diff --git a/server/cros/dynamic_suite.py b/server/cros/dynamic_suite.py
index dc8d7cb..74f91cd 100644
--- a/server/cros/dynamic_suite.py
+++ b/server/cros/dynamic_suite.py
@@ -57,11 +57,14 @@
     if pool:
         pool = 'pool:%s' % pool
     reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
+
     if skip_reimage or reimager.attempt(build, board, job.record, num=num):
         suite = Suite.create_from_name(name, build, pool=pool,
                                        results_dir=job.resultdir)
         suite.run_and_wait(job.record, add_experimental=add_experimental)
 
+    reimager.clear_reimaged_host_state(build)
+
 
 def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
                               pool=None, num=None, skip_reimage=False,
@@ -162,6 +165,7 @@
                                                          debug=False)
         self._pool = pool
         self._results_dir = results_dir
+        self._reimaged_hosts = {}
         self._cf_getter = control_file_getter.FileSystemGetter(
             [os.path.join(autotest_dir, 'server/site_tests')])
 
@@ -194,21 +198,25 @@
         wrapper_job_name = 'try_new_image'
         record('START', None, wrapper_job_name)
         try:
+            # Determine if there are enough working hosts to run on.
             labels = [l for l in [board, self._pool] if l is not None]
             if num > self._count_usable_hosts(labels):
                 raise InadequateHostsException("Too few hosts with %r" % labels)
 
+            # Schedule job and record job metadata.
             self._ensure_version_label(VERSION_PREFIX + build)
-            canary = self._schedule_reimage_job(build, num, board)
-            self._record_job_if_possible(wrapper_job_name, canary)
-            logging.debug('Created re-imaging job: %d', canary.id)
-            while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
-                time.sleep(10)
-            logging.debug('Re-imaging job running.')
-            while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
-                time.sleep(10)
-            logging.debug('Re-imaging job finished.')
-            canary.result = self._afe.poll_job_results(self._tko, canary, 0)
+            canary_job = self._schedule_reimage_job(build, num, board)
+            self._record_job_if_possible(wrapper_job_name, canary_job)
+            logging.debug('Created re-imaging job: %d', canary_job.id)
+
+            # Poll until reimaging is complete.
+            self._wait_for_job_to_start(canary_job.id)
+            self._wait_for_job_to_finish(canary_job.id)
+
+            # Gather job results.
+            canary_job.result = self._afe.poll_job_results(self._tko,
+                                                           canary_job,
+                                                           0)
         except InadequateHostsException as e:
             logging.warning(e)
             record('END WARN', None, wrapper_job_name, str(e))
@@ -219,20 +227,88 @@
             record('END ERROR', None, wrapper_job_name, str(e))
             return False
 
-        if canary.result is True:
-            self._report_results(canary, record)
+        self._remember_reimaged_hosts(build, canary_job)
+
+        if canary_job.result is True:
+            self._report_results(canary_job, record)
             record('END GOOD', None, wrapper_job_name)
             return True
 
-        if canary.result is None:
-            record('FAIL', None, canary.name, 're-imaging tasks did not run')
-        else:  # canary.result is False
-            self._report_results(canary, record)
+        if canary_job.result is None:
+            record('FAIL', None, canary_job.name, 'reimaging tasks did not run')
+        else:  # canary_job.result is False
+            self._report_results(canary_job, record)
 
         record('END FAIL', None, wrapper_job_name)
         return False
 
 
+    def _wait_for_job_to_start(self, job_id):
+        """
+        Wait for the job specified by |job_id| to start.
+
+        @param job_id: the job ID to poll on.
+        """
+        while len(self._afe.get_jobs(id=job_id, not_yet_run=True)) > 0:
+            time.sleep(10)
+        logging.debug('Re-imaging job running.')
+
+
+    def _wait_for_job_to_finish(self, job_id):
+        """
+        Wait for the job specified by |job_id| to finish.
+
+        @param job_id: the job ID to poll on.
+        """
+        while len(self._afe.get_jobs(id=job_id, finished=True)) == 0:
+            time.sleep(10)
+        logging.debug('Re-imaging job finished.')
+
+
+    def _remember_reimaged_hosts(self, build, canary_job):
+        """
+        Remember hosts that were reimaged with |build| as a part |canary_job|.
+
+        @param build: the build that was installed e.g.
+                      x86-alex-release/R18-1655.0.0-a1-b1584.
+        @param canary_job: a completed frontend.Job object, possibly populated
+                           by frontend.AFE.poll_job_results.
+        """
+        if not hasattr(canary_job, 'results_platform_map'):
+            return
+        if not self._reimaged_hosts.get('build'):
+            self._reimaged_hosts[build] = []
+        for platform in canary_job.results_platform_map:
+            for host in canary_job.results_platform_map[platform]['Total']:
+                self._reimaged_hosts[build].append(host)
+
+
+    def clear_reimaged_host_state(self, build):
+        """
+        Clear per-host state created in the autotest DB for this job.
+
+        After reimaging a host, we label it and set some host attributes on it
+        that are then used by the suite scheduling code.  This call cleans
+        that up.
+
+        @param build: the build whose hosts we want to clean up e.g.
+                      x86-alex-release/R18-1655.0.0-a1-b1584.
+        """
+        labels = self._afe.get_labels(name__startswith=VERSION_PREFIX + build)
+        for label in labels: self._afe.run('delete_label', id=label.id)
+        for host in self._reimaged_hosts.get('build', []):
+            self._clear_build_state(host)
+
+
+    def _clear_build_state(self, machine):
+        """
+        Clear all build-specific labels, attributes from the target.
+
+        @param machine: the host to clear labels, attributes from.
+        """
+        self._afe.set_host_attribute('job_repo_url', None, hostname=machine)
+
+
     def _record_job_if_possible(self, test_name, job):
         """
         Record job id as keyval, if possible, so it can be referenced later.
diff --git a/server/cros/dynamic_suite_unittest.py b/server/cros/dynamic_suite_unittest.py
index f276688..311f678 100755
--- a/server/cros/dynamic_suite_unittest.py
+++ b/server/cros/dynamic_suite_unittest.py
@@ -8,6 +8,7 @@
 
 import logging
 import mox
+import random
 import shutil
 import tempfile
 import time
@@ -21,6 +22,7 @@
     """Faked out RPC-client-side Job object."""
     def __init__(self, id=0, statuses=[]):
         self.id = id
+        self.hostname = 'host%d' % id
         self.owner = 'tester'
         self.name = 'Fake Job %d' % self.id
         self.statuses = statuses
@@ -31,6 +33,11 @@
     def __init__(self, status='Ready'):
         self.status = status
 
+class FakeLabel(object):
+    """Faked out RPC-client-side Label object."""
+    def __init__(self, id=0):
+        self.id = id
+
 
 class DynamicSuiteTest(mox.MoxTestBase):
     """Unit tests for dynamic_suite module methods.
@@ -290,9 +297,23 @@
         self.reimager._schedule_reimage_job(self._BUILD, self._NUM, self._BOARD)
 
 
+    def expect_label_cleanup(self, build):
+        """Sets up |self.afe| to expect deletion of the version label.
+
+        @param build: the build the label is named after.
+        """
+        label = FakeLabel(id=random.randrange(0, 5))
+        self.afe.get_labels(
+            name__startswith=mox.StrContains(build)).AndReturn([label])
+        self.afe.run('delete_label', id=label.id)
+
+
     def expect_attempt(self, success, ex=None):
         """Sets up |self.reimager| to expect an attempt() that returns |success|
 
+        Also stubs out Reimger._clear_build_state(), should the caller wish
+        to set an expectation there as well.
+
         @param success: the value returned by poll_job_results()
         @param ex: if not None, |ex| is raised by get_jobs()
         @return a FakeJob configured with appropriate expectations
@@ -312,6 +333,8 @@
         if success is not None:
             self.mox.StubOutWithMock(self.reimager, '_report_results')
             self.reimager._report_results(canary, mox.IgnoreArg())
+            canary.results_platform_map = {None: {'Total': [canary.hostname]}}
+
 
         self.afe.get_jobs(id=canary.id, not_yet_run=True).AndReturn([])
         if ex is not None:
@@ -321,6 +344,9 @@
             self.afe.poll_job_results(mox.IgnoreArg(),
                                       canary, 0).AndReturn(success)
 
+        self.expect_label_cleanup(self._BUILD)
+        self.mox.StubOutWithMock(self.reimager, '_clear_build_state')
+
         return canary
 
 
@@ -331,8 +357,10 @@
         rjob = self.mox.CreateMock(base_job.base_job)
         rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
         rjob.record('END GOOD', mox.IgnoreArg(), mox.IgnoreArg())
+        self.reimager._clear_build_state(mox.StrContains(canary.hostname))
         self.mox.ReplayAll()
         self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+        self.reimager.clear_reimaged_host_state(self._BUILD)
 
 
     def testFailedReimage(self):
@@ -342,8 +370,10 @@
         rjob = self.mox.CreateMock(base_job.base_job)
         rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
         rjob.record('END FAIL', mox.IgnoreArg(), mox.IgnoreArg())
+        self.reimager._clear_build_state(mox.StrContains(canary.hostname))
         self.mox.ReplayAll()
         self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+        self.reimager.clear_reimaged_host_state(self._BUILD)
 
 
     def testReimageThatNeverHappened(self):
@@ -356,6 +386,7 @@
         rjob.record('END FAIL', mox.IgnoreArg(), mox.IgnoreArg())
         self.mox.ReplayAll()
         self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+        self.reimager.clear_reimaged_host_state(self._BUILD)
 
 
     def testReimageThatRaised(self):
@@ -368,6 +399,7 @@
         rjob.record('END ERROR', mox.IgnoreArg(), mox.IgnoreArg(), ex_message)
         self.mox.ReplayAll()
         self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+        self.reimager.clear_reimaged_host_state(self._BUILD)
 
 
     def testReimageThatCouldNotSchedule(self):
@@ -381,8 +413,10 @@
         rjob.record('START', mox.IgnoreArg(), mox.IgnoreArg())
         rjob.record('END WARN', mox.IgnoreArg(), mox.IgnoreArg(),
                     mox.StrContains('Too few hosts'))
+        self.expect_label_cleanup(self._BUILD)
         self.mox.ReplayAll()
         self.reimager.attempt(self._BUILD, self._BOARD, rjob.record)
+        self.reimager.clear_reimaged_host_state(self._BUILD)
 
 
 class SuiteTest(mox.MoxTestBase):
diff --git a/server/site_tests/autoupdate/control b/server/site_tests/autoupdate/control
index 578dfdc..2b0f27d 100644
--- a/server/site_tests/autoupdate/control
+++ b/server/site_tests/autoupdate/control
@@ -21,7 +21,7 @@
 
 from autotest_lib.server import frontend
 
-vers = 'cros-version-'
+vers = 'cros-version:'
 repo_url = None
 if 'image_name' in locals():
     from autotest_lib.server.cros import dynamic_suite
@@ -31,6 +31,14 @@
 AFE = frontend.AFE(debug=False)
 
 def clear_version_labels(machine):
+    """Clear all build-specific labels, attributes from the target.
+
+    Copied from server/cros/dynamic_suite.py, because we can't be sure that
+    code will be available in all contexts in which this control file is
+    currently used.
+
+    @param machine: the host to clear labels, attributes from.
+    """
     labels = AFE.get_labels(name__startswith=vers)
     for label in labels: label.remove_hosts(hosts=[machine])
     AFE.set_host_attribute('job_repo_url', None, hostname=machine)