autotest: Remove autotest_stats from monitor_db
This is the second of a two part change. This CL removes the rest of the
autotest_stats references in monitor_db.
BUG=chromium:667171
TEST=Run jobs on moblab.
Change-Id: Ie4e588056e2bf12564ed7bc03b2d82c5d6ce8109
Reviewed-on: https://chromium-review.googlesource.com/414271
Commit-Ready: Prathmesh Prabhu <pprabhu@chromium.org>
Tested-by: Prathmesh Prabhu <pprabhu@chromium.org>
Reviewed-by: Prathmesh Prabhu <pprabhu@chromium.org>
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 31b325f..d420458 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py
@@ -25,7 +25,6 @@
from autotest_lib.client.common_lib import control_data
from autotest_lib.client.common_lib import global_config
from autotest_lib.client.common_lib import utils
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
from autotest_lib.frontend.afe import models, rpc_utils
from autotest_lib.scheduler import agent_task, drone_manager
from autotest_lib.scheduler import email_manager, gc_stats, host_scheduler
@@ -343,7 +342,6 @@
major step begins so we can try to figure out where we are using most
of the tick time.
"""
- timer = autotest_stats.Timer('scheduler.tick')
system_utils.DroneCache.refresh()
self._log_tick_msg('Calling new tick, starting garbage collection().')
self._garbage_collection()
@@ -381,11 +379,13 @@
_drone_manager.execute_actions()
self._log_tick_msg('Calling '
'email_manager.manager.send_queued_emails().')
- with timer.get_client('email_manager_send_queued_emails'):
- email_manager.manager.send_queued_emails()
+ # TODO(pprabhu) crbug.com/667171: Add back metric for % time spent in
+ # this sub-step.
+ email_manager.manager.send_queued_emails()
self._log_tick_msg('Calling django.db.reset_queries().')
- with timer.get_client('django_db_reset_queries'):
- django.db.reset_queries()
+ # TODO(pprabhu) crbug.com/667171: Add back metric for % time spent in
+ # this sub-step.
+ django.db.reset_queries()
self._tick_count += 1
metrics.Counter('chromeos/autotest/scheduler/tick').increment()
@@ -517,12 +517,13 @@
status_list = ','.join("'%s'" % status for status in statuses)
queue_entries = scheduler_models.HostQueueEntry.fetch(
where='status IN (%s)' % status_list)
- autotest_stats.Gauge('scheduler.jobs_per_tick').send(
- 'running', len(queue_entries))
agent_tasks = []
used_queue_entries = set()
+ hqe_count_by_status = {}
for entry in queue_entries:
+ hqe_count_by_status[entry.status] = (
+ hqe_count_by_status.get(entry.status, 0) + 1)
if self.get_agents_for_entry(entry):
# already being handled
continue
@@ -532,6 +533,12 @@
agent_task = self._get_agent_task_for_queue_entry(entry)
agent_tasks.append(agent_task)
used_queue_entries.update(agent_task.queue_entries)
+
+ for status, count in hqe_count_by_status.iteritems():
+ metrics.Gauge(
+ 'chromeos/autotest/scheduler/active_host_queue_entries'
+ ).set(count, fields={'status': status})
+
return agent_tasks
@@ -835,7 +842,9 @@
host_jobs.append(queue_entry)
new_jobs_need_hosts = new_jobs_need_hosts + 1
- autotest_stats.Gauge(key).send('new_hostless_jobs', new_hostless_jobs)
+ metrics.Counter(
+ 'chromeos/autotest/scheduler/scheduled_jobs_hostless'
+ ).increment_by(new_hostless_jobs)
if not host_jobs:
return
if not _inline_host_acquisition:
@@ -850,11 +859,18 @@
self._schedule_host_job(host_assignment.host, host_assignment.job)
new_jobs_with_hosts = new_jobs_with_hosts + 1
- autotest_stats.Gauge(key).send('new_jobs_with_hosts',
- new_jobs_with_hosts)
- autotest_stats.Gauge(key).send('new_jobs_without_hosts',
- new_jobs_need_hosts -
- new_jobs_with_hosts)
+ metrics.Counter(
+ 'chromeos/autotest/scheduler/scheduled_jobs_with_hosts'
+ ).increment_by(new_jobs_with_hosts)
+ # TODO(pprabhu): Decide what to do about this metric. Million dollar
+ # question: What happens to jobs that were not matched. Do they stay in
+ # the queue, and get processed right here in the next tick (then we want
+ # a guage corresponding to the number of outstanding unmatched host
+ # jobs), or are they handled somewhere else (then we need a counter
+ # corresponding to failed_to_match_with_hosts jobs).
+ #autotest_stats.Gauge(key).send('new_jobs_without_hosts',
+ # new_jobs_need_hosts -
+ # new_jobs_with_hosts)
def _schedule_running_host_queue_entries(self):
@@ -1034,13 +1050,19 @@
num_finished_this_tick += agent.task.num_processes
self._log_extra_msg("Agent finished")
self.remove_agent(agent)
- autotest_stats.Gauge('scheduler.jobs_per_tick').send(
- 'agents_started', num_started_this_tick)
- autotest_stats.Gauge('scheduler.jobs_per_tick').send(
- 'agents_finished', num_finished_this_tick)
+
+ metrics.Counter(
+ 'chromeos/autotest/scheduler/agent_processes_started'
+ ).increment_by(num_started_this_tick)
+ metrics.Counter(
+ 'chromeos/autotest/scheduler/agent_processes_finished'
+ ).increment_by(num_finished_this_tick)
+ num_agent_processes = _drone_manager.total_running_processes()
+ metrics.Gauge(
+ 'chromeos/autotest/scheduler/agent_processes'
+ ).set(num_agent_processes)
logging.info('%d running processes. %d added this tick.',
- _drone_manager.total_running_processes(),
- num_started_this_tick)
+ num_agent_processes, num_started_this_tick)
def _process_recurring_runs(self):