One-off fix to address the issue where a scheduler shutdown immediately after a special task leaves the HQE in a strange state. Specifically, we saw this when a cleanup fails, and the scheduler shuts down before the associated repair starts. HQEs are now requeued after a failed cleanup/verify. TODO: reimplement scheduler to maintain less state in memory by not relying on storing an array of AgentTasks. Risk: medium (scheduler change) Visibility: medium (scheduler bug fix) Signed-off-by: James Ren <jamesren@google.com> git-svn-id: http://test.kernel.org/svn/autotest/trunk@3573 592f7852-d20e-0410-864c-8624ca9c26a4

commit: 58721a8b8d9562579f2e45fdd80db2f67d58a6ac [log] [tgz]
author: showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4> Thu Aug 20 23:32:40 2009 +0000
committer: showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4> Thu Aug 20 23:32:40 2009 +0000
tree: e2ba0b3685ab65a837283c5e0861bd440f61f1df
parent: 3739978b7e911e0259382d62ccd88a28fa260744 [diff] [blame]
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index d761f1a..54871bc 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py

@@ -1686,14 +1686,7 @@
         protection = host_protections.Protection.get_attr_name(protection)
 
         self.host = host
-        self.queue_entry = None
-        # recovery code can pass a HQE that's already been requeued. for a
-        # metahost, that means the host has been unassigned. in that case,
-        # ignore the HQE.
-        hqe_still_assigned_to_this_host = (queue_entry and queue_entry.host
-                                           and queue_entry.host.id == host.id)
-        if hqe_still_assigned_to_this_host:
-            self.queue_entry = queue_entry
+        self.queue_entry = queue_entry
 
         super(RepairTask, self).__init__(
                 task, ['-R', '--host-protection', protection],
@@ -1708,8 +1701,6 @@
         super(RepairTask, self).prolog()
         logging.info("repair_task starting")
         self.host.set_status('Repairing')
-        if self.queue_entry:
-            self.queue_entry.requeue()
 
 
     def _keyval_path(self):
@@ -1770,6 +1761,9 @@
                 self.monitor.get_process(), source,
                 destination_path=destination)
 
+        if not self.success and self.queue_entry:
+            self.queue_entry.requeue()
+
 
 class VerifyTask(PreJobTask):
     TASK_TYPE = models.SpecialTask.Task.VERIFY
commit	58721a8b8d9562579f2e45fdd80db2f67d58a6ac	[log] [tgz]
author	showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4>	Thu Aug 20 23:32:40 2009 +0000
committer	showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4>	Thu Aug 20 23:32:40 2009 +0000
tree	e2ba0b3685ab65a837283c5e0861bd440f61f1df
parent	3739978b7e911e0259382d62ccd88a28fa260744 [diff] [blame]