autotest: Remove autotest_stats from monitor_db This is the second of a two part change. This CL removes the rest of the autotest_stats references in monitor_db. BUG=chromium:667171 TEST=Run jobs on moblab. Change-Id: Ie4e588056e2bf12564ed7bc03b2d82c5d6ce8109 Reviewed-on: https://chromium-review.googlesource.com/414271 Commit-Ready: Prathmesh Prabhu <pprabhu@chromium.org> Tested-by: Prathmesh Prabhu <pprabhu@chromium.org> Reviewed-by: Prathmesh Prabhu <pprabhu@chromium.org>

commit: ed7ece9fdbb4018f853c96d388f96143322f8f7d [log] [tgz]
author: Prathmesh Prabhu <pprabhu@chromium.org> Wed Nov 23 11:19:43 2016 -0800
committer: chrome-bot <chrome-bot@chromium.org> Tue Dec 06 15:58:10 2016 -0800
tree: 84ff1ac6af65d57fa90803ef6201ad3cf7cb9950
parent: 4a23bde8468636e3eac8e32f4bc0896d5628e2c2 [diff] [blame]
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 31b325f..d420458 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py

@@ -25,7 +25,6 @@
 from autotest_lib.client.common_lib import control_data
 from autotest_lib.client.common_lib import global_config
 from autotest_lib.client.common_lib import utils
-from autotest_lib.client.common_lib.cros.graphite import autotest_stats
 from autotest_lib.frontend.afe import models, rpc_utils
 from autotest_lib.scheduler import agent_task, drone_manager
 from autotest_lib.scheduler import email_manager, gc_stats, host_scheduler
@@ -343,7 +342,6 @@
         major step begins so we can try to figure out where we are using most
         of the tick time.
         """
-        timer = autotest_stats.Timer('scheduler.tick')
         system_utils.DroneCache.refresh()
         self._log_tick_msg('Calling new tick, starting garbage collection().')
         self._garbage_collection()
@@ -381,11 +379,13 @@
         _drone_manager.execute_actions()
         self._log_tick_msg('Calling '
                            'email_manager.manager.send_queued_emails().')
-        with timer.get_client('email_manager_send_queued_emails'):
-            email_manager.manager.send_queued_emails()
+        # TODO(pprabhu) crbug.com/667171: Add back metric for % time spent in
+        # this sub-step.
+        email_manager.manager.send_queued_emails()
         self._log_tick_msg('Calling django.db.reset_queries().')
-        with timer.get_client('django_db_reset_queries'):
-            django.db.reset_queries()
+        # TODO(pprabhu) crbug.com/667171: Add back metric for % time spent in
+        # this sub-step.
+        django.db.reset_queries()
         self._tick_count += 1
         metrics.Counter('chromeos/autotest/scheduler/tick').increment()
 
@@ -517,12 +517,13 @@
         status_list = ','.join("'%s'" % status for status in statuses)
         queue_entries = scheduler_models.HostQueueEntry.fetch(
                 where='status IN (%s)' % status_list)
-        autotest_stats.Gauge('scheduler.jobs_per_tick').send(
-                'running', len(queue_entries))
 
         agent_tasks = []
         used_queue_entries = set()
+        hqe_count_by_status = {}
         for entry in queue_entries:
+            hqe_count_by_status[entry.status] = (
+                hqe_count_by_status.get(entry.status, 0) + 1)
             if self.get_agents_for_entry(entry):
                 # already being handled
                 continue
@@ -532,6 +533,12 @@
             agent_task = self._get_agent_task_for_queue_entry(entry)
             agent_tasks.append(agent_task)
             used_queue_entries.update(agent_task.queue_entries)
+
+        for status, count in hqe_count_by_status.iteritems():
+            metrics.Gauge(
+                'chromeos/autotest/scheduler/active_host_queue_entries'
+            ).set(count, fields={'status': status})
+
         return agent_tasks
 
 
@@ -835,7 +842,9 @@
                 host_jobs.append(queue_entry)
                 new_jobs_need_hosts = new_jobs_need_hosts + 1
 
-        autotest_stats.Gauge(key).send('new_hostless_jobs', new_hostless_jobs)
+        metrics.Counter(
+            'chromeos/autotest/scheduler/scheduled_jobs_hostless'
+        ).increment_by(new_hostless_jobs)
         if not host_jobs:
             return
         if not _inline_host_acquisition:
@@ -850,11 +859,18 @@
             self._schedule_host_job(host_assignment.host, host_assignment.job)
             new_jobs_with_hosts = new_jobs_with_hosts + 1
 
-        autotest_stats.Gauge(key).send('new_jobs_with_hosts',
-                                       new_jobs_with_hosts)
-        autotest_stats.Gauge(key).send('new_jobs_without_hosts',
-                                       new_jobs_need_hosts -
-                                       new_jobs_with_hosts)
+        metrics.Counter(
+            'chromeos/autotest/scheduler/scheduled_jobs_with_hosts'
+        ).increment_by(new_jobs_with_hosts)
+        # TODO(pprabhu): Decide what to do about this metric. Million dollar
+        # question: What happens to jobs that were not matched. Do they stay in
+        # the queue, and get processed right here in the next tick (then we want
+        # a guage corresponding to the number of outstanding unmatched host
+        # jobs), or are they handled somewhere else (then we need a counter
+        # corresponding to failed_to_match_with_hosts jobs).
+        #autotest_stats.Gauge(key).send('new_jobs_without_hosts',
+        #                               new_jobs_need_hosts -
+        #                               new_jobs_with_hosts)
 
 
     def _schedule_running_host_queue_entries(self):
@@ -1034,13 +1050,19 @@
                 num_finished_this_tick += agent.task.num_processes
                 self._log_extra_msg("Agent finished")
                 self.remove_agent(agent)
-        autotest_stats.Gauge('scheduler.jobs_per_tick').send(
-                'agents_started', num_started_this_tick)
-        autotest_stats.Gauge('scheduler.jobs_per_tick').send(
-                'agents_finished', num_finished_this_tick)
+
+        metrics.Counter(
+            'chromeos/autotest/scheduler/agent_processes_started'
+        ).increment_by(num_started_this_tick)
+        metrics.Counter(
+            'chromeos/autotest/scheduler/agent_processes_finished'
+        ).increment_by(num_finished_this_tick)
+        num_agent_processes = _drone_manager.total_running_processes()
+        metrics.Gauge(
+            'chromeos/autotest/scheduler/agent_processes'
+        ).set(num_agent_processes)
         logging.info('%d running processes. %d added this tick.',
-                     _drone_manager.total_running_processes(),
-                     num_started_this_tick)
+                     num_agent_processes, num_started_this_tick)
 
 
     def _process_recurring_runs(self):
commit	ed7ece9fdbb4018f853c96d388f96143322f8f7d	[log] [tgz]
author	Prathmesh Prabhu <pprabhu@chromium.org>	Wed Nov 23 11:19:43 2016 -0800
committer	chrome-bot <chrome-bot@chromium.org>	Tue Dec 06 15:58:10 2016 -0800
tree	84ff1ac6af65d57fa90803ef6201ad3cf7cb9950
parent	4a23bde8468636e3eac8e32f4bc0896d5628e2c2 [diff] [blame]