[autotest] Report DUT repair status to monarch.
At the end of each special task, if the task knows whether the
target DUT was working or broken, post that information using
ts_mon.
BUG=None
TEST=run repair and verify jobs in a local instance
Change-Id: I713a8584eb66820d890e3733c8790b421720672a
Reviewed-on: https://chromium-review.googlesource.com/345972
Reviewed-by: Aviv Keshet <akeshet@chromium.org>
Tested-by: Richard Barnette <jrbarnette@chromium.org>
diff --git a/scheduler/monitor_db.py b/scheduler/monitor_db.py
index 354f44e..d520a40 100755
--- a/scheduler/monitor_db.py
+++ b/scheduler/monitor_db.py
@@ -1,4 +1,7 @@
#!/usr/bin/python
+
+#pylint: disable=C0111
+
"""
Autotest scheduler
"""
@@ -16,6 +19,7 @@
from autotest_lib.frontend import setup_django_environment
import django.db
+from chromite.lib import ts_mon_config
from autotest_lib.client.common_lib import control_data
from autotest_lib.client.common_lib import global_config
@@ -167,6 +171,8 @@
# Start the thread to report metadata.
metadata_reporter.start()
+ ts_mon_config.SetupTsMonGlobalState('autotest_scheduler')
+
try:
initialize()
dispatcher = Dispatcher()
diff --git a/scheduler/prejob_task.py b/scheduler/prejob_task.py
index a394d0c..4ca15db 100644
--- a/scheduler/prejob_task.py
+++ b/scheduler/prejob_task.py
@@ -1,4 +1,4 @@
-#pylint: disable-msg=C0111
+#pylint: disable=C0111
"""
Prejob tasks.
@@ -67,12 +67,13 @@
def epilog(self):
super(PreJobTask, self).epilog()
- if self.success:
- return
-
if self.host.protection == host_protections.Protection.DO_NOT_VERIFY:
# effectively ignore failure for these hosts
self.success = True
+
+ if self.success:
+ self.host.record_working_state(True,
+ self.task.time_finished)
return
if self.queue_entry:
@@ -396,3 +397,5 @@
self.host.set_status(models.Host.Status.REPAIR_FAILED)
if self.queue_entry:
self._fail_queue_entry()
+ self.host.record_working_state(bool(self.success),
+ self.task.time_finished)
diff --git a/scheduler/rdb_hosts.py b/scheduler/rdb_hosts.py
index e2f4f29..54f28e1 100644
--- a/scheduler/rdb_hosts.py
+++ b/scheduler/rdb_hosts.py
@@ -18,7 +18,9 @@
import logging
import time
+
from django.core import exceptions as django_exceptions
+from chromite.lib import metrics
import common
from autotest_lib.frontend.afe import rdb_model_extensions as rdb_models
@@ -174,6 +176,9 @@
to the host.
"""
+ _HOST_WORKING_METRIC = metrics.Boolean('chromeos/autotest/dut_working')
+
+
def __init__(self, **kwargs):
# This class is designed to only check for the bare minimum
@@ -240,6 +245,21 @@
self.record_state('host_history', 'status', status)
+ def record_working_state(self, working, timestamp):
+ """Report to Monarch whether we are working or broken.
+
+ @param working Host repair status. `True` means that the DUT
+ is up and expected to pass tests. `False`
+ means the DUT has failed repair and requires
+ manual intervention.
+ @param timestamp Time that the status was recorded.
+ """
+ fields = {'hostname': self.hostname, 'board': self.board}
+ if len(self.pools) == 1:
+ fields['pool'] = self.pools[0]
+ self._HOST_WORKING_METRIC.set(working, fields=fields)
+
+
def update_field(self, fieldname, value):
"""Proxy for updating a field on the host.