blob: 6e9a84be03e14d372654ed68e54a16a90f796cdb [file] [log] [blame]
Aviv Kesheta43072a2018-04-16 16:29:42 -07001#!/usr/bin/env python
2# Copyright 2018 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""A simple service to monitor DUT statuses from master db/afe."""
7import collections
8import logging
9import sys
10import time
11
12import common
13from autotest_lib.server import constants
14from autotest_lib.server import frontend
15from chromite.lib import metrics
16from chromite.lib import ts_mon_config
17
18from infra_libs import ts_mon
19
20
21DutCountBucket = collections.namedtuple('DutCountBucket',
22 ['board',
23 'model',
24 'pool',
25 'is_locked',
26 'status']
27 )
28
29
30def _get_bucket_for_host(host):
31 """Determine the counter bucket for |host|.
32
33 Args:
34 host: A Host object as returned by afe.
35
36 Returns:
37 A DutCountBucket instance describing the bucket for this host.
38 """
39 board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX)
40 model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX)
41 pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX)
42 if pool in constants.Pools.MANAGED_POOLS:
43 pool = 'managed:' + pool
44 status = host.status or '[None]'
45 is_locked = host.locked
46 return DutCountBucket(board, model, pool, is_locked, status)
47
48
49def _get_unique_label(labels, prefix):
50 """Return the labels for a given prefix, with prefix stripped.
51
52 If prefixed label does not occur, return '[None]'
53 If prefixed label occurs multiply, return '[Multiple]'
54
55 _get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]'
56
57 _get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1'
58
59 _get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]'
60 """
61 ls = [l[len(prefix):] for l in labels if l.startswith(prefix)]
62 if not ls:
63 return '[None]'
64 elif len(ls) == 1:
65 return ls[0]
66 else:
67 return '[Multiple]'
68
69
70def main(argv):
71 """Entry point for dut_mon."""
72 logging.getLogger().setLevel(logging.INFO)
73
74 with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True):
75 afe = frontend.AFE()
76 counters = collections.defaultdict(lambda: 0)
77
78 field_spec = [ts_mon.StringField('board'),
79 ts_mon.StringField('model'),
80 ts_mon.StringField('pool'),
81 ts_mon.BooleanField('is_locked'),
82 ts_mon.StringField('status'),
83 ]
84 dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count',
85 description='The number of duts in a given '
86 'state and bucket.',
87 field_spec=field_spec)
88 tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick',
89 description='Tick counter of dut_mon.')
90
91 while True:
92 # Note: We reset all counters to zero in each loop rather than
93 # creating a new defaultdict, because we want to ensure that any
94 # gauges that were previously set to a nonzero value by this process
95 # get set back to zero if necessary.
96 for k in counters:
97 counters[k] = 0
98
99 logging.info('Fetching all hosts.')
100 hosts = afe.get_hosts()
101 logging.info('Fetched %s hosts.', len(hosts))
102 for host in hosts:
103 fields = _get_bucket_for_host(host)
104 counters[fields] += 1
105
106 for field, value in counters.iteritems():
107 logging.info('%s %s', field, value)
108 dut_count.set(value, fields=field.__dict__)
109
110 tick_count.increment()
111 logging.info('Sleeping for 2 minutes.')
112 time.sleep(120)
113
114
115if __name__ == '__main__':
116 main(sys.argv)