Ensure RepairTasks aren't associated with the queue entries that spawned them, so that if the QE is aborted during repair the repair task will continue running (and just leave the QE alone from then on). Signed-off-by: Steve Howard <showard@google.com> git-svn-id: http://test.kernel.org/svn/autotest/trunk@2917 592f7852-d20e-0410-864c-8624ca9c26a4

commit: ccbd6c5c6dfc10072f6ace2f528b9ed7c764a0b3 [log] [tgz]
author: showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4> Sat Mar 21 00:10:21 2009 +0000
committer: showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4> Sat Mar 21 00:10:21 2009 +0000
tree: c827f0fea800f2374cdff5784a0f9409ee8b3a14
parent: dabe63e56a236b8f726de5f7b9a42bc6a4b7b14d [diff] [blame]
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 11b7f2d..83df096 100644
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py

@@ -1172,8 +1172,10 @@
 
     def on_task_failure(self):
         self.queue = Queue.Queue(0)
-        for task in self.active_task.failure_tasks:
-            self.add_task(task)
+        # run failure tasks in a new Agent, so host_ids and queue_entry_ids will
+        # get reset.
+        new_agent = Agent(self.active_task.failure_tasks)
+        self.dispatcher.add_agent(new_agent)
 
 
     def is_running(self):
@@ -1324,8 +1326,10 @@
         protection = host_protections.Protection.get_attr_name(protection)
 
         self.host = host
-        self.queue_entry = queue_entry
-        self._set_ids(host=host, queue_entries=[queue_entry])
+        self.queue_entry_to_fail = queue_entry
+        # *don't* include the queue entry in IDs -- if the queue entry is
+        # aborted, we want to leave the repair task running
+        self._set_ids(host=host)
 
         self.create_temp_resultsdir('.repair')
         cmd = [_autoserv_path , '-p', '-R', '-m', host.hostname,
@@ -1333,28 +1337,35 @@
                '--host-protection', protection]
         super(RepairTask, self).__init__(cmd, self.temp_results_dir)
 
-        self._set_ids(host=host, queue_entries=[queue_entry])
         self.set_host_log_file('repair', self.host)
 
 
     def prolog(self):
         logging.info("repair_task starting")
         self.host.set_status('Repairing')
-        if self.queue_entry:
-            self.queue_entry.requeue()
+        if self.queue_entry_to_fail:
+            self.queue_entry_to_fail.requeue()
 
 
     def _fail_queue_entry(self):
-        assert self.queue_entry
-        self.queue_entry.set_execution_subdir()
+        assert self.queue_entry_to_fail
+
+        if self.queue_entry_to_fail.meta_host:
+            return # don't fail metahost entries, they'll be reassigned
+
+        self.queue_entry_to_fail.update_from_database()
+        if self.queue_entry_to_fail.status != 'Queued':
+            return # entry has been aborted
+
+        self.queue_entry_to_fail.set_execution_subdir()
         # copy results logs into the normal place for job results
         _drone_manager.copy_results_on_drone(
             self.monitor.get_process(),
             source_path=self.temp_results_dir + '/',
-            destination_path=self.queue_entry.execution_tag() + '/')
+            destination_path=self.queue_entry_to_fail.execution_tag() + '/')
 
-        self._copy_and_parse_results([self.queue_entry])
-        self.queue_entry.handle_host_failure()
+        self._copy_and_parse_results([self.queue_entry_to_fail])
+        self.queue_entry_to_fail.handle_host_failure()
 
 
     def epilog(self):
@@ -1363,7 +1374,7 @@
             self.host.set_status('Ready')
         else:
             self.host.set_status('Repair Failed')
-            if self.queue_entry and not self.queue_entry.meta_host:
+            if self.queue_entry_to_fail:
                 self._fail_queue_entry()
 
 
@@ -1810,12 +1821,7 @@
         self.__new_record = new_record
 
         if row is None:
-            sql = 'SELECT * FROM %s WHERE ID=%%s' % self.__table
-            rows = _db.execute(sql, (id,))
-            if not rows:
-                raise DBError("row not found (table=%s, id=%s)"
-                              % (self.__table, id))
-            row = rows[0]
+            row = self._fetch_row_from_db(id)
 
         if self._initialized:
             differences = self._compare_fields_in_row(row)
@@ -1832,6 +1838,15 @@
         cls._instances_by_type_and_id.clear()
 
 
+    def _fetch_row_from_db(self, row_id):
+        sql = 'SELECT * FROM %s WHERE ID=%%s' % self.__table
+        rows = _db.execute(sql, (row_id,))
+        if not rows:
+            raise DBError("row not found (table=%s, id=%s)"
+                          % (self.__table, id))
+        return rows[0]
+
+
     def _assert_row_length(self, row):
         assert len(row) == len(self._fields), (
             "table = %s, row = %s/%d, fields = %s/%d" % (
@@ -1875,6 +1890,12 @@
         self._valid_fields.remove('id')
 
 
+    def update_from_database(self):
+        assert self.id is not None
+        row = self._fetch_row_from_db(self.id)
+        self._update_fields_from_row(row)
+
+
     def count(self, where, table = None):
         if not table:
             table = self.__table
commit	ccbd6c5c6dfc10072f6ace2f528b9ed7c764a0b3	[log] [tgz]
author	showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4>	Sat Mar 21 00:10:21 2009 +0000
committer	showard <showard@592f7852-d20e-0410-864c-8624ca9c26a4>	Sat Mar 21 00:10:21 2009 +0000
tree	c827f0fea800f2374cdff5784a0f9409ee8b3a14
parent	dabe63e56a236b8f726de5f7b9a42bc6a4b7b14d [diff] [blame]