[autotest] Remove per-tick process restriction. The per-tick process restriction was causing a performance problem when a tick took a long time, and there isn't a good reason to keep the per-tick process constraint as there is already a total process constraint. TEST=Ran the scheduler. The unit tests pass. BUG=chromium:471352 Change-Id: I2b669fb758fbcc898e1727da51bd6d4cd99cd5d2 Reviewed-on: https://chromium-review.googlesource.com/265072 Trybot-Ready: Paul Hobbs <phobbs@google.com> Tested-by: Paul Hobbs <phobbs@google.com> Commit-Queue: Paul Hobbs <phobbs@google.com> Reviewed-by: Fang Deng <fdeng@chromium.org>

commit: b92af21b84c5e27af7f2023ea54409c124d0968e [log] [tgz]
author: Paul Hobbs <phobbs@google.com> Thu Apr 09 15:12:41 2015 -0700
committer: ChromeOS Commit Bot <chromeos-commit-bot@chromium.org> Tue Apr 14 19:08:06 2015 +0000
tree: bd0ab8b93629c9e8ab90b8501e53e6d615cb4802
parent: b1c9f343e7533309cfba886037206fd22351082f [diff] [blame]
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 201d2af..ef76796 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py

@@ -1,6 +1,4 @@
 #!/usr/bin/python
-#pylint: disable-msg=C0111
-
 """
 Autotest scheduler
 """
@@ -706,7 +704,7 @@
                 # host has already been recovered in some way
                 continue
             if self._host_has_scheduled_special_task(host):
-                # host will have a special task scheduled on the next cycle
+                # host will have a special task scheduled on the next tick
                 continue
             if print_message:
                 logging.info(print_message, host.hostname)
@@ -939,8 +937,7 @@
                     agent.task.abort()
 
 
-    def _can_start_agent(self, agent, num_started_this_cycle,
-                         have_reached_limit):
+    def _can_start_agent(self, agent, have_reached_limit):
         # always allow zero-process agents to run
         if agent.task.num_processes == 0:
             return True
@@ -954,14 +951,6 @@
                 agent.task.get_drone_hostnames_allowed())
         if agent.task.num_processes > max_runnable_processes:
             return False
-        # if a single agent exceeds the per-cycle throttling, still allow it to
-        # run when it's the first agent in the cycle
-        if num_started_this_cycle == 0:
-            return True
-        # per-cycle throttling
-        if (num_started_this_cycle + agent.task.num_processes >
-                scheduler_config.config.max_processes_started_per_cycle):
-            return False
         return True
 
 
@@ -998,8 +987,8 @@
                 it's finish method, and set the success member of the
                 task based on this exit code.
         """
-        num_started_this_cycle = 0
-        num_finished_this_cycle = 0
+        num_started_this_tick = 0
+        num_finished_this_tick = 0
         have_reached_limit = False
         # iterate over copy, so we can remove agents during iteration
         logging.debug('Handling %d Agents', len(self._agents))
@@ -1008,26 +997,25 @@
                                 'queue_entry ids:%s' % (agent.host_ids,
                                 agent.queue_entry_ids))
             if not agent.started:
-                if not self._can_start_agent(agent, num_started_this_cycle,
-                                             have_reached_limit):
+                if not self._can_start_agent(agent, have_reached_limit):
                     have_reached_limit = True
                     logging.debug('Reached Limit of allowed running Agents.')
                     continue
-                num_started_this_cycle += agent.task.num_processes
+                num_started_this_tick += agent.task.num_processes
                 self._log_extra_msg('Starting Agent')
             agent.tick()
             self._log_extra_msg('Agent tick completed.')
             if agent.is_done():
-                num_finished_this_cycle += agent.task.num_processes
+                num_finished_this_tick += agent.task.num_processes
                 self._log_extra_msg("Agent finished")
                 self.remove_agent(agent)
         autotest_stats.Gauge('scheduler.jobs_per_tick').send(
-                'agents_started', num_started_this_cycle)
+                'agents_started', num_started_this_tick)
         autotest_stats.Gauge('scheduler.jobs_per_tick').send(
-                'agents_finished', num_finished_this_cycle)
-        logging.info('%d running processes. %d added this cycle.',
+                'agents_finished', num_finished_this_tick)
+        logging.info('%d running processes. %d added this tick.',
                      _drone_manager.total_running_processes(),
-                     num_started_this_cycle)
+                     num_started_this_tick)
 
 
     def _process_recurring_runs(self):
@@ -1326,11 +1314,10 @@
         # When a job is added to database, its initial status is always
         # Starting. In a scheduler tick, scheduler finds all jobs in Starting
         # status, check if any of them can be started. If scheduler hits some
-        # limit, e.g., max_hostless_jobs_per_drone,
-        # max_processes_started_per_cycle, scheduler will leave these jobs in
-        # Starting status. Otherwise, the jobs' status will be changed to
-        # Running, and an autoserv process will be started in drone for each of
-        # these jobs.
+        # limit, e.g., max_hostless_jobs_per_drone, scheduler will
+        # leave these jobs in Starting status. Otherwise, the jobs'
+        # status will be changed to Running, and an autoserv process
+        # will be started in drone for each of these jobs.
         # If the entry is still in status Starting, the process has not started
         # yet. Therefore, there is no need to parse and collect log. Without
         # this check, exception will be raised by scheduler as execution_subdir
commit	b92af21b84c5e27af7f2023ea54409c124d0968e	[log] [tgz]
author	Paul Hobbs <phobbs@google.com>	Thu Apr 09 15:12:41 2015 -0700
committer	ChromeOS Commit Bot <chromeos-commit-bot@chromium.org>	Tue Apr 14 19:08:06 2015 +0000
tree	bd0ab8b93629c9e8ab90b8501e53e6d615cb4802
parent	b1c9f343e7533309cfba886037206fd22351082f [diff] [blame]