Implement periodic reverification of dead hosts, configurable in global_config.  Implemented as part of the periodic cleanup, so the frequency of reverification is bounded by the periodic cleanup interval.  I felt this would be acceptable and putting this in the existing cleanup class makes things more nicely organized.

Signed-off-by: Steve Howard <showard@google.com>


git-svn-id: http://test.kernel.org/svn/autotest/trunk@4100 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/scheduler/monitor_db_cleanup.py b/scheduler/monitor_db_cleanup.py
index dfe2075..3912aab 100644
--- a/scheduler/monitor_db_cleanup.py
+++ b/scheduler/monitor_db_cleanup.py
@@ -4,10 +4,10 @@
 
 
 import datetime, time, logging
-import common
 from autotest_lib.database import database_connection
 from autotest_lib.frontend.afe import models
 from autotest_lib.scheduler import email_manager, scheduler_config
+from autotest_lib.client.common_lib import host_protections
 
 
 class PeriodicCleanup(object):
@@ -46,6 +46,7 @@
 
     def __init__(self, db, clean_interval_minutes):
         super(UserCleanup, self).__init__(db, clean_interval_minutes)
+        self._last_reverify_time = time.time()
 
 
     def _cleanup(self):
@@ -54,6 +55,7 @@
         self._abort_jobs_past_max_runtime()
         self._clear_inactive_blocks()
         self._check_for_db_inconsistencies()
+        self._reverify_dead_hosts()
 
 
     def _abort_timed_out_jobs(self):
@@ -152,6 +154,36 @@
             USING (job_id) WHERE hqe.job_id IS NULL""")
 
 
+    def _should_reverify_hosts_now(self):
+        reverify_period_sec = (scheduler_config.config.reverify_period_minutes
+                               * 60)
+        if reverify_period_sec == 0:
+            return False
+        return (self._last_reverify_time + reverify_period_sec) <= time.time()
+
+
+    def _reverify_dead_hosts(self):
+        if not self._should_reverify_hosts_now():
+            return
+
+        self._last_reverify_time = time.time()
+        logging.info('Checking for dead hosts to reverify')
+        hosts = models.Host.objects.filter(
+                status=models.Host.Status.REPAIR_FAILED,
+                locked=False,
+                invalid=False)
+        hosts = hosts.exclude(
+                protection=host_protections.Protection.DO_NOT_VERIFY)
+        if not hosts:
+            return
+
+        logging.info('Reverifying dead hosts %s'
+                     % ', '.join(host.hostname for host in hosts))
+        for host in hosts:
+            models.SpecialTask.objects.create(
+                    host=host, task=models.SpecialTask.Task.VERIFY)
+
+
 class TwentyFourHourUpkeep(PeriodicCleanup):
     """Cleanup that runs at the startup of monitor_db and every subsequent
        twenty four hours.