[autotest] Pipe check_hosts parameter down into dynamic_suite

The builders need to schedule a test suite and then wait for it to
complete.  They probably want to check to make sure the suite can be
scheduled, and error out if not.  That's great.  For suites that we
schedule once a day or asynchronously in some other way, we probably
don't want to bail just because the testbed has too many hosts that
need Repair or something.  We want to fire and forget.

To support these two modes of operation, pipe a 'check_hosts' paramter
all the way from the create_suite_job() RPC, down through control
files, all the way to dynamic_suite.reimage_and_run().  Ensure that we
tolerate check_hosts being left unset in reimage_and_run().  Also,
make 'atest suite create' able to toggle this value.

BUG=chromium-os:27526
TEST=dynamic_suite_unittest.py, site_rpc_interface_unittest.py
TEST=./server/autoserv test_suites/dev_harness with check_hosts set both ways.
TEST=install the patch on an autotest instance, and re-run a test suite against the new interface.  The suite should check for hosts and behave appropriately.

Change-Id: I10c3f42dbc37f26d7af3c40439ce212ebf74cfcd
Reviewed-on: https://gerrit.chromium.org/gerrit/17633
Tested-by: Chris Masone <cmasone@chromium.org>
Reviewed-by: Scott Zawalski <scottz@chromium.org>
Commit-Ready: Chris Masone <cmasone@chromium.org>
diff --git a/server/cros/dynamic_suite.py b/server/cros/dynamic_suite.py
index 74f91cd..5d114b3 100644
--- a/server/cros/dynamic_suite.py
+++ b/server/cros/dynamic_suite.py
@@ -46,19 +46,21 @@
                  Default: None
     @param num: how many devices to reimage.
                 Default in global_config
+    @param check_hosts: require appropriate hosts to be available now.
     @param skip_reimage: skip reimaging, used for testing purposes.
                          Default: False
     @param add_experimental: schedule experimental tests as well, or not.
                              Default: True
     """
-    build, board, name, job, pool, num, skip_reimage, add_experimental = \
-        _vet_reimage_and_run_args(**dargs)
+    (build, board, name, job, pool, num, check_hosts, skip_reimage,
+     add_experimental) = _vet_reimage_and_run_args(**dargs)
     board = 'board:%s' % board
     if pool:
         pool = 'pool:%s' % pool
     reimager = Reimager(job.autodir, pool=pool, results_dir=job.resultdir)
 
-    if skip_reimage or reimager.attempt(build, board, job.record, num=num):
+    if skip_reimage or reimager.attempt(build, board, job.record, check_hosts,
+                                        num=num):
         suite = Suite.create_from_name(name, build, pool=pool,
                                        results_dir=job.resultdir)
         suite.run_and_wait(job.record, add_experimental=add_experimental)
@@ -67,8 +69,9 @@
 
 
 def _vet_reimage_and_run_args(build=None, board=None, name=None, job=None,
-                              pool=None, num=None, skip_reimage=False,
-                              add_experimental=True, **dargs):
+                              pool=None, num=None, check_hosts=True,
+                              skip_reimage=False, add_experimental=True,
+                              **dargs):
     """
     Vets arguments for reimage_and_run().
 
@@ -85,6 +88,7 @@
                  Default: None
     @param num: how many devices to reimage.
                 Default in global_config
+    @param check_hosts: require appropriate hosts to be available now.
     @param skip_reimage: skip reimaging, used for testing purposes.
                          Default: False
     @param add_experimental: schedule experimental tests as well, or not.
@@ -100,7 +104,8 @@
         if not value or not isinstance(value, expected):
             raise SuiteArgumentException("reimage_and_run() needs %s=<%r>" % (
                 key, expected))
-    return build, board, name, job, pool, num, skip_reimage, add_experimental
+    return (build, board, name, job, pool, num, check_hosts, skip_reimage,
+            add_experimental)
 
 
 def inject_vars(vars, control_file_in):
@@ -175,7 +180,7 @@
         return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
 
 
-    def attempt(self, build, board, record, num=None):
+    def attempt(self, build, board, record, check_hosts, num=None):
         """
         Synchronously attempt to reimage some machines.
 
@@ -189,6 +194,7 @@
         @param record: callable that records job status.
                        prototype:
                          record(status, subdir, name, reason)
+        @param check_hosts: require appropriate hosts to be available now.
         @param num: how many devices to reimage.
         @return True if all reimaging jobs succeed, false otherwise.
         """
@@ -198,13 +204,12 @@
         wrapper_job_name = 'try_new_image'
         record('START', None, wrapper_job_name)
         try:
-            # Determine if there are enough working hosts to run on.
-            labels = [l for l in [board, self._pool] if l is not None]
-            if num > self._count_usable_hosts(labels):
-                raise InadequateHostsException("Too few hosts with %r" % labels)
+            self._ensure_version_label(VERSION_PREFIX + build)
+
+            if check_hosts:
+                self._ensure_enough_hosts(board, self._pool, num)
 
             # Schedule job and record job metadata.
-            self._ensure_version_label(VERSION_PREFIX + build)
             canary_job = self._schedule_reimage_job(build, num, board)
             self._record_job_if_possible(wrapper_job_name, canary_job)
             logging.debug('Created re-imaging job: %d', canary_job.id)
@@ -243,6 +248,22 @@
         return False
 
 
+    def _ensure_enough_hosts(self, board, pool, num):
+        """
+        Determine if there are enough working hosts to run on.
+
+        Raises exception if there are not enough hosts.
+
+        @param board: which kind of devices to reimage.
+        @param pool: the pool of machines to use for scheduling purposes.
+        @param num: how many devices to reimage.
+        @raises InadequateHostsException: if too few working hosts.
+        """
+        labels = [l for l in [board, pool] if l is not None]
+        if num > self._count_usable_hosts(labels):
+            raise InadequateHostsException('Too few hosts with %r' % labels)
+
+
     def _wait_for_job_to_start(self, job_id):
         """
         Wait for the job specified by |job_id| to start.