Rate limit the final parse of the scheduler. If more than 100 or so run at a time, it will bring mysql to its knees (for no good reason...all actions are on different jobs).
Risk: High
Visibility: Medium (things will work better on big jobs)
Signed-off-by: Jeremy Orlow <jorlow@google.com>
git-svn-id: http://test.kernel.org/svn/autotest/trunk@2089 592f7852-d20e-0410-864c-8624ca9c26a4
diff --git a/global_config.ini b/global_config.ini
index 514953c..077ba69 100644
--- a/global_config.ini
+++ b/global_config.ini
@@ -22,6 +22,7 @@
notify_email:
max_running_jobs: 1000
max_jobs_started_per_cycle: 100
+max_parse_processes: 5
tick_pause_sec: 5
clean_interval_minutes: 60
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 0048b17..b914832 100644
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py
@@ -9,6 +9,7 @@
import optparse, signal, smtplib, socket, datetime, stat, pwd, errno
import common
from autotest_lib.client.common_lib import global_config, host_protections
+from autotest_lib.client.common_lib import utils
RESULTS_DIR = '.'
@@ -214,10 +215,11 @@
return cmd % (parse, flags, results_dir, output)
+_parse_command_queue = []
def parse_results(results_dir, flags=""):
if _testing_mode:
return
- os.system(generate_parse_command(results_dir, flags))
+ _parse_command_queue.append(generate_parse_command(results_dir, flags))
@@ -430,6 +432,9 @@
clean_interval = (
global_config.global_config.get_config_value(
_global_config_section, 'clean_interval_minutes', type=int))
+ max_parse_processes = (
+ global_config.global_config.get_config_value(
+ _global_config_section, 'max_parse_processes', type=int))
def __init__(self):
self._agents = []
@@ -454,9 +459,36 @@
self._find_aborting()
self._schedule_new_jobs()
self._handle_agents()
+ self._run_final_parses()
email_manager.send_queued_emails()
+ def _run_final_parses(self):
+ process_count = 0
+ try:
+ for line in utils.system_output('ps -e').splitlines():
+ if 'parse.py' in line:
+ process_count += 1
+ except Exception:
+ # We'll try again in a bit. This is a work-around for one time
+ # when the scheduler crashed due to a "Interrupted system call"
+ return
+
+ if process_count:
+ print "%d parses currently running" % process_count
+
+ while (process_count < self.max_parse_processes and
+ _parse_command_queue):
+ cmd = _parse_command_queue.pop(0)
+ print "Starting another final parse with cmd %s" % cmd
+ os.system(cmd)
+ process_count += 1
+
+ if _parse_command_queue:
+ print ("%d cmds still in final parse queue" %
+ len(_parse_command_queue))
+
+
def add_agent(self, agent):
self._agents.append(agent)
agent.dispatcher = self