Add job maximum runtime, a new per-job timeout that counts time since the job actually started.
* added started_on field to host_queue_entries, so that we could actually compute this timeout
* added max_runtime_hrs to jobs, with default in global config, and added option to create_job() RPC
* added the usual controls to AFE and the CLI for the new job option
* added new max runtime timeout method to 
* added migration to add new fields and set a safe default max runtime for existing jobs

Signed-off-by: Steve Howard <showard@google.com>


git-svn-id: http://test.kernel.org/svn/autotest/trunk@3132 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/scheduler/monitor_db_cleanup.py b/scheduler/monitor_db_cleanup.py
index 9a55da6..59313c0 100644
--- a/scheduler/monitor_db_cleanup.py
+++ b/scheduler/monitor_db_cleanup.py
@@ -52,6 +52,7 @@
             logging.info('Running periodic cleanup')
             self._abort_timed_out_jobs()
             self._abort_jobs_past_synch_start_timeout()
+            self._abort_jobs_past_max_runtime()
             self._clear_inactive_blocks()
             self._check_for_db_inconsistencies()
 
@@ -88,6 +89,24 @@
                 queue_entry.abort(None)
 
 
+    def _abort_jobs_past_max_runtime(self):
+        """
+        Abort executions that have started and are past the job's max runtime.
+        """
+        logging.info('Aborting all jobs that have passed maximum runtime')
+        rows = self._db.execute("""
+            SELECT hqe.id
+            FROM host_queue_entries AS hqe
+            INNER JOIN jobs ON (hqe.job_id = jobs.id)
+            WHERE NOT hqe.complete AND NOT hqe.aborted AND
+            hqe.started_on + INTERVAL jobs.max_runtime_hrs HOUR < NOW()""")
+        query = models.HostQueueEntry.objects.filter(
+            id__in=[row[0] for row in rows])
+        for queue_entry in query.distinct():
+            logging.warning('Aborting entry %s due to max runtime', queue_entry)
+            queue_entry.abort(None)
+
+
     def _check_for_db_inconsistencies(self):
         logging.info('Checking for db inconsistencies')
         query = models.HostQueueEntry.objects.filter(active=True, complete=True)