blob: 6e9a84be03e14d372654ed68e54a16a90f796cdb [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2018 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""A simple service to monitor DUT statuses from master db/afe."""
import collections
import logging
import sys
import time
import common
from autotest_lib.server import constants
from autotest_lib.server import frontend
from chromite.lib import metrics
from chromite.lib import ts_mon_config
from infra_libs import ts_mon
DutCountBucket = collections.namedtuple('DutCountBucket',
['board',
'model',
'pool',
'is_locked',
'status']
)
def _get_bucket_for_host(host):
"""Determine the counter bucket for |host|.
Args:
host: A Host object as returned by afe.
Returns:
A DutCountBucket instance describing the bucket for this host.
"""
board = _get_unique_label(host.labels, constants.Labels.BOARD_PREFIX)
model = _get_unique_label(host.labels, constants.Labels.MODEL_PREFIX)
pool = _get_unique_label(host.labels, constants.Labels.POOL_PREFIX)
if pool in constants.Pools.MANAGED_POOLS:
pool = 'managed:' + pool
status = host.status or '[None]'
is_locked = host.locked
return DutCountBucket(board, model, pool, is_locked, status)
def _get_unique_label(labels, prefix):
"""Return the labels for a given prefix, with prefix stripped.
If prefixed label does not occur, return '[None]'
If prefixed label occurs multiply, return '[Multiple]'
_get_unique_label(['foo:1', 'foo:2', 'bar1'], 'foo:') -> '[Multiple]'
_get_unique_label(['foo:1', 'bar2', 'baz3'], 'foo:') -> '1'
_get_prefixed_labels(['bar1', 'baz1'], 'foo:') -> '[None]'
"""
ls = [l[len(prefix):] for l in labels if l.startswith(prefix)]
if not ls:
return '[None]'
elif len(ls) == 1:
return ls[0]
else:
return '[Multiple]'
def main(argv):
"""Entry point for dut_mon."""
logging.getLogger().setLevel(logging.INFO)
with ts_mon_config.SetupTsMonGlobalState('dut_mon', indirect=True):
afe = frontend.AFE()
counters = collections.defaultdict(lambda: 0)
field_spec = [ts_mon.StringField('board'),
ts_mon.StringField('model'),
ts_mon.StringField('pool'),
ts_mon.BooleanField('is_locked'),
ts_mon.StringField('status'),
]
dut_count = metrics.Gauge('chromeos/autotest/dut_mon/dut_count',
description='The number of duts in a given '
'state and bucket.',
field_spec=field_spec)
tick_count = metrics.Counter('chromeos/autotest/dut_mon/tick',
description='Tick counter of dut_mon.')
while True:
# Note: We reset all counters to zero in each loop rather than
# creating a new defaultdict, because we want to ensure that any
# gauges that were previously set to a nonzero value by this process
# get set back to zero if necessary.
for k in counters:
counters[k] = 0
logging.info('Fetching all hosts.')
hosts = afe.get_hosts()
logging.info('Fetched %s hosts.', len(hosts))
for host in hosts:
fields = _get_bucket_for_host(host)
counters[fields] += 1
for field, value in counters.iteritems():
logging.info('%s %s', field, value)
dut_count.set(value, fields=field.__dict__)
tick_count.increment()
logging.info('Sleeping for 2 minutes.')
time.sleep(120)
if __name__ == '__main__':
main(sys.argv)