[autotest] Add stats to servo host repair
Add stats to trace how often servo host repair fails/succeeds.
TEST=locally run servo host repair flow, confirm stats show up in
graphite.
BUG=chromium:254429
Change-Id: I0d7e9733923ae01bacb005858d20c7c138caebad
Reviewed-on: https://chromium-review.googlesource.com/172734
Reviewed-by: Dan Shi <dshi@chromium.org>
Tested-by: Fang Deng <fdeng@chromium.org>
Commit-Queue: Fang Deng <fdeng@chromium.org>
diff --git a/server/hosts/servo_host.py b/server/hosts/servo_host.py
index 33b0c97..d15df38 100644
--- a/server/hosts/servo_host.py
+++ b/server/hosts/servo_host.py
@@ -20,6 +20,7 @@
from autotest_lib.client.common_lib.cros import retry
from autotest_lib.server.cros.servo import servo
from autotest_lib.server.hosts import ssh_host
+from autotest_lib.site_utils.graphite import stats
from autotest_lib.site_utils.rpm_control_system import rpm_client
@@ -38,6 +39,11 @@
pass
+class ServoHostRepairMethodNA(ServoHostException):
+ """Raised when a repair method is not applicable."""
+ pass
+
+
class ServoHostRepairTotalFailure(ServoHostException):
"""Raised if all attempts to repair a servo host fail."""
pass
@@ -346,11 +352,12 @@
"""Power cycle the servo host using PoE.
@raises ServoHostRepairFailure if it fails to fix the servo host.
+ @raises ServoHostRepairMethodNA if it does not support power.
"""
if not self.has_power():
- raise ServoHostRepairFailure('%s does not support power.' %
- self.hostname)
+ raise ServoHostRepairMethodNA('%s does not support power.' %
+ self.hostname)
logging.info('Attempting repair via PoE powercycle.')
failed_cycles = 0
self.power_cycle()
@@ -386,13 +393,21 @@
self._powercycle_to_repair]
errors = []
for repair_func in repair_funcs:
+ counter_prefix = 'servo_host_repair.%s.' % repair_func.__name__
try:
repair_func()
self.verify()
+ stats.Counter(counter_prefix + 'SUCCEEDED').increment()
return
+ except ServoHostRepairMethodNA as e:
+ logging.warn('Repair method NA: %s', e)
+ stats.Counter(counter_prefix + 'RepairNA').increment()
+ errors.append(str(e))
except Exception as e:
logging.warn('Failed to repair servo: %s', e)
+ stats.Counter(counter_prefix + 'FAILED').increment()
errors.append(str(e))
+ stats.Counter('servo_host_repair.Full_Repair_Failed').increment()
raise ServoHostRepairTotalFailure(
'All attempts at repairing the servo failed:\n%s' %
'\n'.join(errors))