Blame - site_utils/lab_inventory.py - platform/external/autotest

blob: ba34fe0df1ac1f06b7cf813d1d10c86a9989e1b5 [file] [log] [blame]

J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright 2015 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""Create e-mail reports of the Lab's DUT inventory.
				7
				8	Gathers a list of all DUTs of interest in the Lab, segregated by
				9	board and pool, and determines whether each DUT is working or
				10	broken. Then, send one or more e-mail reports summarizing the
				11	status to e-mail addresses provided on the command line.
				12
				13	usage: lab_inventory.py [ options ] [ board ... ]
				14
				15	Options:
				16	--duration / -d <hours>
				17	How far back in time to search job history to determine DUT
				18	status.
				19
				20	--board-notify <address>[,<address>]
				21	Send the "board status" e-mail to all the specified e-mail
				22	addresses.
				23
				24	--pool-notify <address>[,<address>]
				25	Send the "pool status" e-mail to all the specified e-mail
				26	addresses.
				27
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	28	--recommend <number>
				29	When generating the "board status" e-mail, included a list of
				30	<number> specific DUTs to be recommended for repair.
				31
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	32	--logdir <directory>
				33	Log progress and actions in a file under this directory. Text
				34	of any e-mail sent will also be logged in a timestamped file in
				35	this directory.
				36
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	37	--debug
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	38	Suppress all logging and sending e-mail. Instead, write the
				39	output that would be generated onto stdout.
				40
				41	<board> arguments:
				42	With no arguments, gathers the status for all boards in the lab.
				43	With one or more named boards on the command line, restricts
				44	reporting to just those boards.
				45
				46	"""
				47
				48
				49	import argparse
				50	import logging
				51	import logging.handlers
				52	import os
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	53	import re
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	54	import sys
				55	import time
				56
				57	import common
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	58	from autotest_lib.client.bin import utils
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	59	from autotest_lib.client.common_lib import time_utils
J. Richard Barnette	a7c514e	2015-09-15 11:13:23 -0700	[diff] [blame]	60	from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	61	from autotest_lib.server.hosts import servo_host
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	62	from autotest_lib.site_utils import gmail_lib
				63	from autotest_lib.site_utils import status_history
				64	from autotest_lib.site_utils.suite_scheduler import constants
				65
				66
				67	# The pools in the Lab that are actually of interest.
				68	#
				69	# These are general purpose pools of DUTs that are considered
				70	# identical for purposes of testing. That is, a device in one of
				71	# these pools can be shifted to another pool at will for purposes
				72	# of supplying test demand.
				73	#
				74	# Devices in these pools are not allowed to have special-purpose
				75	# attachments, or to be part of in any kind of custom fixture.
				76	# Devices in these pools are also required to reside in areas
				77	# managed by the Platforms team (i.e. at the time of this writing,
				78	# only in "Atlantis" or "Destiny").
				79	#
				80	# _CRITICAL_POOLS - Pools that must be kept fully supplied in order
				81	# to guarantee timely completion of tests from builders.
				82	# _SPARE_POOL - A low priority pool that is allowed to provide
				83	# spares to replace broken devices in the critical pools.
				84	# _MANAGED_POOLS - The set of all the general purpose pools
				85	# monitored by this script.
				86
J. Richard Barnette	222d7f4	2015-12-14 17:22:51 -0800	[diff] [blame]	87	_CRITICAL_POOLS = ['bvt', 'cq', 'continuous']
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	88	_SPARE_POOL = 'suites'
				89	_MANAGED_POOLS = _CRITICAL_POOLS + [_SPARE_POOL]
				90
J. Richard Barnette	b8bc570c	2016-03-17 17:03:57 -0700	[diff] [blame]	91	# _EXCLUDED_LABELS - A set of labels that disqualify a DUT from
				92	# monitoring by this script. Currently, we're excluding any
				93	# 'adb' host, because we're not ready to monitor Android or
				94	# Brillo hosts.
				95	_EXCLUDED_LABELS = set(['adb'])
				96
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	97	# _DEFAULT_DURATION:
				98	# Default value used for the --duration command line option.
				99	# Specifies how far back in time to search in order to determine
				100	# DUT status.
				101
				102	_DEFAULT_DURATION = 24
				103
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	104	# _LOGDIR:
				105	# Relative path used in the calculation of the default setting
				106	# for the --logdir option. The full path path is relative to
				107	# the root of the autotest directory, as determined from
				108	# sys.argv[0].
				109	# _LOGFILE:
				110	# Basename of a file to which general log information will be
				111	# written.
				112	# _LOG_FORMAT:
				113	# Format string for log messages.
				114
				115	_LOGDIR = os.path.join('logs', 'dut-data')
				116	_LOGFILE = 'lab-inventory.log'
				117	_LOG_FORMAT = '%(asctime)s \| %(levelname)-10s \| %(message)s'
				118
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	119	# Pattern describing location-based host names in the Chrome OS test
				120	# labs. Each DUT hostname designates the DUT's location:
				121	# * A lab (room) that's physically separated from other labs
				122	# (i.e. there's a door).
				123	# * A row (or aisle) of DUTs within the lab.
				124	# * A vertical rack of shelves on the row.
				125	# * A specific host on one shelf of the rack.
				126
				127	_HOSTNAME_PATTERN = re.compile(
				128	r'(chromeos\d+)-row(\d+)-rack(\d+)-host(\d+)')
				129
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	130
				131	class _PoolCounts(object):
				132	"""Maintains a set of `HostJobHistory` objects for a pool.
				133
				134	The collected history objects are nominally all part of a single
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	135	scheduling pool of DUTs. The collection maintains a list of
				136	working DUTs, a list of broken DUTs, and a list of all DUTs.
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	137
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	138	Performance note: Certain methods in this class are potentially
				139	expensive:
				140	* `get_working()`
				141	* `get_working_list()`
				142	* `get_broken()`
				143	* `get_broken_list()`
				144	The first time any one of these methods is called, it causes
				145	multiple RPC calls with a relatively expensive set of database
				146	queries. However, the results of the queries are cached in the
				147	individual `HostJobHistory` objects, so only the first call
				148	actually pays the full cost.
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	149
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	150	Additionally, `get_working_list()` and `get_broken_list()` both
				151	cache their return values to avoid recalculating lists at every
				152	call; this caching is separate from the caching of RPC results
				153	described above.
				154
				155	This class is deliberately constructed to delay the RPC cost
				156	until the accessor methods are called (rather than to query in
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	157	`record_host()`) so that it's possible to construct a complete
				158	`_LabInventory` without making the expensive queries at creation
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	159	time. `_populate_board_counts()`, below, assumes this behavior.
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	160
				161	"""
				162
				163	def __init__(self):
				164	self._histories = []
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	165	self._working_list = None
				166	self._broken_list = None
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	167
				168
				169	def record_host(self, host_history):
				170	"""Add one `HostJobHistory` object to the collection.
				171
				172	@param host_history The `HostJobHistory` object to be
				173	remembered.
				174
				175	"""
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	176	self._working_list = None
				177	self._broken_list = None
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	178	self._histories.append(host_history)
				179
				180
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	181	def get_working_list(self):
				182	"""Return a list of all working DUTs in the pool.
				183
				184	Filter `self._histories` for histories where the last
				185	diagnosis is `WORKING`.
				186
				187	Cache the result so that we only cacluate it once.
				188
				189	@return A list of HostJobHistory objects.
				190
				191	"""
				192	if self._working_list is None:
				193	self._working_list = [h for h in self._histories
				194	if h.last_diagnosis()[0] == status_history.WORKING]
				195	return self._working_list
				196
				197
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	198	def get_working(self):
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	199	"""Return the number of working DUTs in the pool."""
				200	return len(self.get_working_list())
				201
				202
				203	def get_broken_list(self):
				204	"""Return a list of all broken DUTs in the pool.
				205
				206	Filter `self._histories` for histories where the last
				207	diagnosis is not `WORKING`.
				208
				209	Cache the result so that we only cacluate it once.
				210
				211	@return A list of HostJobHistory objects.
				212
				213	"""
				214	if self._broken_list is None:
				215	self._broken_list = [h for h in self._histories
				216	if h.last_diagnosis()[0] != status_history.WORKING]
				217	return self._broken_list
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	218
				219
				220	def get_broken(self):
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	221	"""Return the number of broken DUTs in the pool."""
				222	return len(self.get_broken_list())
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	223
				224
				225	def get_total(self):
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	226	"""Return the total number of DUTs in the pool."""
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	227	return len(self._histories)
				228
				229
				230	class _BoardCounts(object):
				231	"""Maintains a set of `HostJobHistory` objects for a board.
				232
				233	The collected history objects are nominally all of the same
				234	board. The collection maintains a count of working DUTs, a
				235	count of broken DUTs, and a total count. The counts can be
				236	obtained either for a single pool, or as a total across all
				237	pools.
				238
				239	DUTs in the collection must be assigned to one of the pools
				240	in `_MANAGED_POOLS`.
				241
				242	The `get_working()` and `get_broken()` methods rely on the
				243	methods of the same name in _PoolCounts, so the performance
				244	note in _PoolCounts applies here as well.
				245
				246	"""
				247
				248	def __init__(self):
				249	self._pools = {
				250	pool: _PoolCounts() for pool in _MANAGED_POOLS
				251	}
				252
				253	def record_host(self, host_history):
				254	"""Add one `HostJobHistory` object to the collection.
				255
				256	@param host_history The `HostJobHistory` object to be
				257	remembered.
				258
				259	"""
J. Richard Barnette	3d0590a	2015-04-29 12:56:12 -0700	[diff] [blame]	260	pool = host_history.host_pool
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	261	self._pools[pool].record_host(host_history)
				262
				263
				264	def _count_pool(self, get_pool_count, pool=None):
				265	"""Internal helper to count hosts in a given pool.
				266
				267	The `get_pool_count` parameter is a function to calculate
				268	the exact count of interest for the pool.
				269
				270	@param get_pool_count Function to return a count from a
				271	_PoolCount object.
				272	@param pool The pool to be counted. If `None`,
				273	return the total across all pools.
				274
				275	"""
				276	if pool is None:
				277	return sum([get_pool_count(counts)
				278	for counts in self._pools.values()])
				279	else:
				280	return get_pool_count(self._pools[pool])
				281
				282
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	283	def get_working_list(self):
				284	"""Return a list of all working DUTs for the board.
				285
				286	Go through all HostJobHistory objects in the board's pools,
				287	selecting the ones where the last diagnosis is `WORKING`.
				288
				289	@return A list of HostJobHistory objects.
				290
				291	"""
				292	l = []
				293	for p in self._pools.values():
				294	l.extend(p.get_working_list())
				295	return l
				296
				297
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	298	def get_working(self, pool=None):
				299	"""Return the number of working DUTs in a pool.
				300
				301	@param pool The pool to be counted. If `None`, return the
				302	total across all pools.
				303
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	304	@return The total number of working DUTs in the selected
				305	pool(s).
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	306	"""
				307	return self._count_pool(_PoolCounts.get_working, pool)
				308
				309
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	310	def get_broken_list(self):
				311	"""Return a list of all broken DUTs for the board.
				312
				313	Go through all HostJobHistory objects in the board's pools,
				314	selecting the ones where the last diagnosis is not
				315	`WORKING`.
				316
				317	@return A list of HostJobHistory objects.
				318
				319	"""
				320	l = []
				321	for p in self._pools.values():
				322	l.extend(p.get_broken_list())
				323	return l
				324
				325
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	326	def get_broken(self, pool=None):
				327	"""Return the number of broken DUTs in a pool.
				328
				329	@param pool The pool to be counted. If `None`, return the
				330	total across all pools.
				331
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	332	@return The total number of broken DUTs in the selected pool(s).
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	333	"""
				334	return self._count_pool(_PoolCounts.get_broken, pool)
				335
				336
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	337	def get_spares_buffer(self):
				338	"""Return the the nominal number of working spares.
				339
				340	Calculates and returns how many working spares there would
				341	be in the spares pool if all broken DUTs were in the spares
				342	pool. This number may be negative, indicating a shortfall
				343	in the critical pools.
				344
				345	@return The total number DUTs in the spares pool, less the total
				346	number of broken DUTs in all pools.
				347	"""
				348	return self.get_total(_SPARE_POOL) - self.get_broken()
				349
				350
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	351	def get_total(self, pool=None):
				352	"""Return the total number of DUTs in a pool.
				353
				354	@param pool The pool to be counted. If `None`, return the
				355	total across all pools.
				356
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	357	@return The total number of DUTs in the selected pool(s).
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	358	"""
				359	return self._count_pool(_PoolCounts.get_total, pool)
				360
				361
				362	class _LabInventory(dict):
				363	"""Collection of `HostJobHistory` objects for the Lab's inventory.
				364
				365	The collection is indexed by board. Indexing returns the
				366	_BoardCounts object associated with the board.
				367
				368	The collection is also iterable. The iterator returns all the
				369	boards in the inventory, in unspecified order.
				370
				371	"""
				372
J. Richard Barnette	b8bc570c	2016-03-17 17:03:57 -0700	[diff] [blame]	373	@staticmethod
				374	def _eligible_host(afehost):
				375	"""Return whether this host is eligible for monitoring.
				376
				377	Hosts with any label that's in `_EXCLUDED_LABELS` aren't
				378	eligible.
				379
				380	@param afehost The host to be tested for eligibility.
				381	"""
				382	return not len(_EXCLUDED_LABELS.intersection(afehost.labels))
				383
				384
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	385	@classmethod
				386	def create_inventory(cls, afe, start_time, end_time, boardlist=[]):
				387	"""Return a Lab inventory with specified parameters.
				388
				389	By default, gathers inventory from `HostJobHistory` objects
				390	for all DUTs in the `_MANAGED_POOLS` list. If `boardlist`
				391	is supplied, the inventory will be restricted to only the
				392	given boards.
				393
				394	@param afe AFE object for constructing the
				395	`HostJobHistory` objects.
				396	@param start_time Start time for the `HostJobHistory`
				397	objects.
				398	@param end_time End time for the `HostJobHistory`
				399	objects.
				400	@param boardlist List of boards to include. If empty,
				401	include all available boards.
				402	@return A `_LabInventory` object for the specified boards.
				403
				404	"""
				405	label_list = [constants.Labels.POOL_PREFIX + l
				406	for l in _MANAGED_POOLS]
				407	afehosts = afe.get_hosts(labels__name__in=label_list)
				408	if boardlist:
J. Richard Barnette	b8bc570c	2016-03-17 17:03:57 -0700	[diff] [blame]	409	# We're deliberately not checking host eligibility in this
				410	# code path. This is a debug path, not used in production;
				411	# it may be useful to include ineligible hosts here.
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	412	boardhosts = []
				413	for board in boardlist:
				414	board_label = constants.Labels.BOARD_PREFIX + board
				415	host_list = [h for h in afehosts
				416	if board_label in h.labels]
				417	boardhosts.extend(host_list)
				418	afehosts = boardhosts
J. Richard Barnette	b8bc570c	2016-03-17 17:03:57 -0700	[diff] [blame]	419	else:
				420	afehosts = [h for h in afehosts if cls._eligible_host(h)]
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	421	create = lambda host: (
				422	status_history.HostJobHistory(afe, host,
				423	start_time, end_time))
				424	return cls([create(host) for host in afehosts])
				425
				426
				427	def __init__(self, histories):
J. Richard Barnette	6948ed3	2015-05-06 08:57:10 -0700	[diff] [blame]	428	# N.B. The query that finds our hosts is restricted to those
				429	# with a valid pool: label, but doesn't check for a valid
				430	# board: label. In some (insufficiently) rare cases, the
				431	# AFE hosts table has been known to (incorrectly) have DUTs
				432	# with a pool: but no board: label. We explicitly exclude
				433	# those here.
				434	histories = [h for h in histories
				435	if h.host_board is not None]
J. Richard Barnette	3d0590a	2015-04-29 12:56:12 -0700	[diff] [blame]	436	boards = set([h.host_board for h in histories])
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	437	initval = { board: _BoardCounts() for board in boards }
				438	super(_LabInventory, self).__init__(initval)
				439	self._dut_count = len(histories)
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	440	self._managed_boards = None
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	441	for h in histories:
J. Richard Barnette	3d0590a	2015-04-29 12:56:12 -0700	[diff] [blame]	442	self[h.host_board].record_host(h)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	443
				444
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	445	def get_managed_boards(self):
				446	"""Return the set of "managed" boards.
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	447
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	448	Operationally, saying a board is "managed" means that the
				449	board will be included in the "board" and "repair
				450	recommendations" reports. That is, if there are failures in
				451	the board's inventory then lab techs will be asked to fix
				452	them without a separate ticket.
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	453
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	454	For purposes of implementation, a board is "managed" if it
				455	has DUTs in both the spare and a non-spare (i.e. critical)
				456	pool.
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	457
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	458	@return A set of all the boards that have both spare and
				459	non-spare pools.
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	460	"""
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	461	if self._managed_boards is None:
				462	self._managed_boards = set()
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	463	for board, counts in self.items():
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	464	spares = counts.get_total(_SPARE_POOL)
				465	total = counts.get_total()
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	466	if spares != 0 and spares != total:
				467	self._managed_boards.add(board)
				468	return self._managed_boards
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	469
				470
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	471	def get_num_duts(self):
				472	"""Return the total number of DUTs in the inventory."""
				473	return self._dut_count
				474
				475
				476	def get_num_boards(self):
				477	"""Return the total number of boards in the inventory."""
				478	return len(self)
				479
				480
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	481	def _sort_by_location(inventory_list):
				482	"""Return a list of DUTs, organized by location.
				483
				484	Take the given list of `HostJobHistory` objects, separate it
				485	into a list per lab, and sort each lab's list by location. The
				486	order of sorting within a lab is
				487	* By row number within the lab,
				488	* then by rack number within the row,
				489	* then by host shelf number within the rack.
				490
				491	Return a list of the sorted lists.
				492
				493	Implementation note: host locations are sorted by converting
				494	each location into a base 100 number. If row, rack or
				495	host numbers exceed the range [0..99], then sorting will
				496	break down.
				497
				498	@return A list of sorted lists of DUTs.
				499
				500	"""
				501	BASE = 100
				502	lab_lists = {}
				503	for history in inventory_list:
				504	location = _HOSTNAME_PATTERN.match(history.host.hostname)
				505	if location:
				506	lab = location.group(1)
				507	key = 0
				508	for idx in location.group(2, 3, 4):
				509	key = BASE * key + int(idx)
				510	lab_lists.setdefault(lab, []).append((key, history))
				511	return_list = []
				512	for dut_list in lab_lists.values():
				513	dut_list.sort(key=lambda t: t[0])
				514	return_list.append([t[1] for t in dut_list])
				515	return return_list
				516
				517
				518	def _score_repair_set(buffer_counts, repair_list):
				519	"""Return a numeric score rating a set of DUTs to be repaired.
				520
				521	`buffer_counts` is a dictionary mapping board names to the
				522	size of the board's spares buffer.
				523
				524	`repair_list` is a list of DUTs to be repaired.
				525
				526	This function calculates the new set of buffer counts that would
				527	result from the proposed repairs, and scores the new set using
				528	two numbers:
				529	* Worst case buffer count for any board (higher is better).
				530	This is the more siginficant number for comparison.
				531	* Number of boards at the worst case (lower is better). This
				532	is the less significant number.
				533
				534	Implementation note: The score could fail to reflect the
				535	intended criteria if there are more than 1000 boards in the
				536	inventory.
				537
				538	@param spare_counts A dictionary mapping boards to buffer counts.
				539	@param repair_list A list of boards to be repaired.
				540	@return A numeric score.
				541
				542	"""
				543	# Go through `buffer_counts`, and create a list of new counts
				544	# that records the buffer count for each board after repair.
				545	# The new list of counts discards the board names, as they don't
				546	# contribute to the final score.
				547	_NBOARDS = 1000
				548	repair_inventory = _LabInventory(repair_list)
				549	new_counts = []
				550	for b, c in buffer_counts.items():
				551	if b in repair_inventory:
				552	newcount = repair_inventory[b].get_total()
				553	else:
				554	newcount = 0
				555	new_counts.append(c + newcount)
				556	# Go through the new list of counts. Find the worst available
				557	# spares count, and count how many times that worst case occurs.
				558	worst_count = new_counts[0]
				559	num_worst = 1
				560	for c in new_counts[1:]:
				561	if c == worst_count:
				562	num_worst += 1
				563	elif c < worst_count:
				564	worst_count = c
				565	num_worst = 1
				566	# Return the calculated score
				567	return _NBOARDS * worst_count - num_worst
				568
				569
				570	def _generate_repair_recommendation(inventory, num_recommend):
				571	"""Return a summary of selected DUTs needing repair.
				572
				573	Returns a message recommending a list of broken DUTs to be
				574	repaired. The list of DUTs is selected based on these
				575	criteria:
				576	* No more than `num_recommend` DUTs will be listed.
				577	* All DUTs must be in the same lab.
				578	* DUTs should be selected for some degree of physical
				579	proximity.
				580	* DUTs for boards with a low spares buffer are more important
				581	than DUTs with larger buffers.
				582
				583	The algorithm used will guarantee that at least one DUT from a
				584	board with the smallest spares buffer will be recommended. If
				585	the worst spares buffer number is shared by more than one board,
				586	the algorithm will tend to prefer repair sets that include more
				587	of those boards over sets that cover fewer boards.
				588
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	589	@param inventory Inventory for generating recommendations.
				590	@param num_recommend Number of DUTs to recommend for repair.
				591
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	592	"""
				593	logging.debug('Creating DUT repair recommendations')
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	594	board_buffer_counts = {}
				595	broken_list = []
				596	for board in inventory.get_managed_boards():
				597	logging.debug('Listing failed DUTs for %s', board)
				598	counts = inventory[board]
				599	if counts.get_broken() != 0:
				600	board_buffer_counts[board] = counts.get_spares_buffer()
				601	broken_list.extend(counts.get_broken_list())
J. Richard Barnette	5512743	2015-10-13 17:01:56 -0700	[diff] [blame]	602	# N.B. The logic inside this loop may seem complicated, but
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	603	# simplification is hard:
				604	# * Calculating an initial recommendation outside of
				605	# the loop likely would make things more complicated,
				606	# not less.
				607	# * It's necessary to calculate an initial lab slice once per
				608	# lab _before_ the while loop, in case the number of broken
				609	# DUTs in a lab is less than `num_recommend`.
J. Richard Barnette	5512743	2015-10-13 17:01:56 -0700	[diff] [blame]	610	recommendation = None
				611	best_score = None
				612	for lab_duts in _sort_by_location(broken_list):
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	613	start = 0
				614	end = num_recommend
				615	lab_slice = lab_duts[start : end]
				616	lab_score = _score_repair_set(board_buffer_counts,
				617	lab_slice)
				618	while end < len(lab_duts):
				619	start += 1
				620	end += 1
				621	new_slice = lab_duts[start : end]
				622	new_score = _score_repair_set(board_buffer_counts,
				623	new_slice)
				624	if new_score > lab_score:
				625	lab_slice = new_slice
				626	lab_score = new_score
				627	if recommendation is None or lab_score > best_score:
				628	recommendation = lab_slice
				629	best_score = lab_score
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	630	message = ['Repair recommendations:\n',
				631	'%-30s %-16s %s' % (
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	632	'Hostname', 'Board', 'Servo instructions')]
				633	for h in recommendation:
				634	servo_name = servo_host.make_servo_hostname(h.host.hostname)
				635	if utils.host_is_in_lab_zone(servo_name):
				636	servo_message = 'Repair servo first'
				637	else:
				638	servo_message = 'No servo present'
				639	line = '%-30s %-16s %s' % (
				640	h.host.hostname, h.host_board, servo_message)
				641	message.append(line)
				642	return '\n'.join(message)
				643
				644
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	645	def _generate_board_inventory_message(inventory):
				646	"""Generate the "board inventory" e-mail message.
				647
				648	The board inventory is a list by board summarizing the number
				649	of working and broken DUTs, and the total shortfall or surplus
				650	of working devices relative to the minimum critical pool
				651	requirement.
				652
				653	The report omits boards with no DUTs in the spare pool or with
				654	no DUTs in a critical pool.
				655
				656	N.B. For sample output text formattted as users can expect to
				657	see it in e-mail and log files, refer to the unit tests.
				658
				659	@param inventory _LabInventory object with the inventory to
				660	be reported on.
				661	@return String with the inventory message to be sent.
				662
				663	"""
				664	logging.debug('Creating board inventory')
J. Richard Barnette	d3ba33a	2015-10-14 11:20:49 -0700	[diff] [blame]	665	nworking = 0
				666	nbroken = 0
				667	nbroken_boards = 0
J. Richard Barnette	ea5a4ba	2016-02-18 16:34:50 -0800	[diff] [blame]	668	ntotal_boards = 0
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	669	summaries = []
				670	for board in inventory.get_managed_boards():
				671	logging.debug('Counting board inventory for %s', board)
				672	counts = inventory[board]
				673	# Summary elements laid out in the same order as the text
				674	# headers:
				675	# Board Avail Bad Good Spare Total
				676	# e[0] e[1] e[2] e[3] e[4] e[5]
				677	element = (board,
				678	counts.get_spares_buffer(),
				679	counts.get_broken(),
				680	counts.get_working(),
				681	counts.get_total(_SPARE_POOL),
				682	counts.get_total())
J. Richard Barnette	ea5a4ba	2016-02-18 16:34:50 -0800	[diff] [blame]	683	if element[2]:
				684	summaries.append(element)
				685	nbroken_boards += 1
				686	ntotal_boards += 1
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	687	nbroken += element[2]
				688	nworking += element[3]
J. Richard Barnette	d3ba33a	2015-10-14 11:20:49 -0700	[diff] [blame]	689	ntotal = nworking + nbroken
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	690	summaries = sorted(summaries, key=lambda e: (e[1], -e[2]))
J. Richard Barnette	d3ba33a	2015-10-14 11:20:49 -0700	[diff] [blame]	691	broken_percent = int(round(100.0 * nbroken / ntotal))
				692	working_percent = 100 - broken_percent
				693	message = ['Summary of DUTs in inventory:',
				694	'%10s %10s %6s' % ('Bad', 'Good', 'Total'),
				695	'%5d %3d%% %5d %3d%% %6d' % (
				696	nbroken, broken_percent,
				697	nworking, working_percent,
				698	ntotal),
				699	'',
				700	'Boards with failures: %d' % nbroken_boards,
J. Richard Barnette	ea5a4ba	2016-02-18 16:34:50 -0800	[diff] [blame]	701	'Boards in inventory: %d' % ntotal_boards,
J. Richard Barnette	d3ba33a	2015-10-14 11:20:49 -0700	[diff] [blame]	702	'', '',
				703	'Full board inventory:\n',
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	704	'%-22s %5s %5s %5s %5s %5s' % (
				705	'Board', 'Avail', 'Bad', 'Good',
				706	'Spare', 'Total')]
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	707	message.extend(
J. Richard Barnette	e39c827	2015-10-20 17:58:30 -0700	[diff] [blame]	708	['%-22s %5d %5d %5d %5d %5d' % e for e in summaries])
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	709	return '\n'.join(message)
				710
				711
J. Richard Barnette	4845fcf	2015-04-20 14:26:25 -0700	[diff] [blame]	712	_POOL_INVENTORY_HEADER = '''\
Aviv Keshet	056d74c	2015-07-14 09:18:43 -0700	[diff] [blame]	713	Notice to Infrastructure deputies: All boards shown below are at
J. Richard Barnette	c9a143c	2015-06-04 11:11:19 -0700	[diff] [blame]	714	less than full strength, please take action to resolve the issues.
				715	Once you're satisified that failures won't recur, failed DUTs can
				716	be replaced with spares by running `balance_pool`. Detailed
				717	instructions can be found here:
J. Richard Barnette	4845fcf	2015-04-20 14:26:25 -0700	[diff] [blame]	718	http://go/cros-manage-duts
				719	'''
				720
				721
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	722	def _generate_pool_inventory_message(inventory):
				723	"""Generate the "pool inventory" e-mail message.
				724
				725	The pool inventory is a list by pool and board summarizing the
				726	number of working and broken DUTs in the pool. Only boards with
				727	at least one broken DUT are included in the list.
				728
				729	N.B. For sample output text formattted as users can expect to
				730	see it in e-mail and log files, refer to the unit tests.
				731
				732	@param inventory _LabInventory object with the inventory to
				733	be reported on.
				734	@return String with the inventory message to be sent.
				735
				736	"""
				737	logging.debug('Creating pool inventory')
J. Richard Barnette	4845fcf	2015-04-20 14:26:25 -0700	[diff] [blame]	738	message = [_POOL_INVENTORY_HEADER]
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	739	newline = ''
				740	for pool in _CRITICAL_POOLS:
				741	message.append(
				742	'%sStatus for pool:%s, by board:' % (newline, pool))
				743	message.append(
				744	'%-20s %5s %5s %5s' % (
				745	'Board', 'Bad', 'Good', 'Total'))
				746	data_list = []
				747	for board, counts in inventory.items():
				748	logging.debug('Counting inventory for %s, %s',
				749	board, pool)
				750	broken = counts.get_broken(pool)
				751	if broken == 0:
				752	continue
				753	working = counts.get_working(pool)
				754	total = counts.get_total(pool)
				755	data_list.append((board, broken, working, total))
				756	if data_list:
				757	data_list = sorted(data_list, key=lambda d: -d[1])
				758	message.extend(
				759	['%-20s %5d %5d %5d' % t for t in data_list])
				760	else:
				761	message.append('(All boards at full strength)')
				762	newline = '\n'
				763	return '\n'.join(message)
				764
				765
				766	def _send_email(arguments, tag, subject, recipients, body):
				767	"""Send an inventory e-mail message.
				768
				769	The message is logged in the selected log directory using `tag`
				770	for the file name.
				771
				772	If the --print option was requested, the message is neither
				773	logged nor sent, but merely printed on stdout.
				774
				775	@param arguments Parsed command-line options.
				776	@param tag Tag identifying the inventory for logging
				777	purposes.
				778	@param subject E-mail Subject: header line.
				779	@param recipients E-mail addresses for the To: header line.
				780	@param body E-mail message body.
				781
				782	"""
				783	logging.debug('Generating email: "%s"', subject)
				784	all_recipients = ', '.join(recipients)
				785	report_body = '\n'.join([
				786	'To: %s' % all_recipients,
				787	'Subject: %s' % subject,
				788	'', body, ''])
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	789	if arguments.debug:
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	790	print report_body
				791	else:
				792	filename = os.path.join(arguments.logdir, tag)
				793	try:
				794	report_file = open(filename, 'w')
				795	report_file.write(report_body)
				796	report_file.close()
				797	except EnvironmentError as e:
				798	logging.error('Failed to write %s: %s', filename, e)
				799	try:
				800	gmail_lib.send_email(all_recipients, subject, body)
				801	except Exception as e:
				802	logging.error('Failed to send e-mail to %s: %s',
				803	all_recipients, e)
				804
				805
				806	def _separate_email_addresses(address_list):
				807	"""Parse a list of comma-separated lists of e-mail addresses.
				808
				809	@param address_list A list of strings containing comma
				810	separate e-mail addresses.
				811	@return A list of the individual e-mail addresses.
				812
				813	"""
				814	newlist = []
				815	for arg in address_list:
				816	newlist.extend([email.strip() for email in arg.split(',')])
				817	return newlist
				818
				819
				820	def _verify_arguments(arguments):
				821	"""Validate command-line arguments.
				822
				823	Join comma separated e-mail addresses for `--board-notify` and
				824	`--pool-notify` in separate option arguments into a single list.
				825
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	826	For non-debug uses, require that notification be requested for
				827	at least one report. For debug, if notification isn't specified,
				828	treat it as "run all the reports."
				829
				830	The return value indicates success or failure; in the case of
				831	failure, we also write an error message to stderr.
				832
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	833	@param arguments Command-line arguments as returned by
				834	`ArgumentParser`
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	835	@return True if the arguments are semantically good, or False
				836	if the arguments don't meet requirements.
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	837
				838	"""
				839	arguments.board_notify = _separate_email_addresses(
				840	arguments.board_notify)
				841	arguments.pool_notify = _separate_email_addresses(
				842	arguments.pool_notify)
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	843	if not arguments.board_notify and not arguments.pool_notify:
				844	if not arguments.debug:
				845	sys.stderr.write('Must specify at least one of '
				846	'--board-notify or --pool-notify\n')
				847	return False
				848	else:
				849	# We want to run all the reports. An empty notify list
				850	# will cause a report to be skipped, so make sure the
				851	# lists are non-empty.
				852	arguments.board_notify = ['']
				853	arguments.pool_notify = ['']
				854	return True
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	855
				856
				857	def _get_logdir(script):
				858	"""Get the default directory for the `--logdir` option.
				859
				860	The default log directory is based on the parent directory
				861	containing this script.
				862
				863	@param script Path to this script file.
				864	@return A path to a directory.
				865
				866	"""
				867	basedir = os.path.dirname(os.path.abspath(script))
				868	basedir = os.path.dirname(basedir)
				869	return os.path.join(basedir, _LOGDIR)
				870
				871
				872	def _parse_command(argv):
				873	"""Parse the command line arguments.
				874
				875	Create an argument parser for this command's syntax, parse the
				876	command line, and return the result of the ArgumentParser
				877	parse_args() method.
				878
				879	@param argv Standard command line argument vector; argv[0] is
				880	assumed to be the command name.
				881	@return Result returned by ArgumentParser.parse_args().
				882
				883	"""
				884	parser = argparse.ArgumentParser(
				885	prog=argv[0],
				886	description='Gather and report lab inventory statistics')
				887	parser.add_argument('-d', '--duration', type=int,
				888	default=_DEFAULT_DURATION, metavar='HOURS',
				889	help='number of hours back to search for status'
				890	' (default: %d)' % _DEFAULT_DURATION)
				891	parser.add_argument('--board-notify', action='append',
				892	default=[], metavar='ADDRESS',
				893	help='Generate board inventory message, '
				894	'and send it to the given e-mail address(es)')
				895	parser.add_argument('--pool-notify', action='append',
				896	default=[], metavar='ADDRESS',
				897	help='Generate pool inventory message, '
				898	'and send it to the given address(es)')
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	899	parser.add_argument('-r', '--recommend', type=int, default=None,
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	900	help=('Specify how many DUTs should be '
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	901	'recommended for repair (default: no '
				902	'recommendation)'))
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	903	parser.add_argument('--debug', action='store_true',
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	904	help='Print e-mail messages on stdout '
				905	'without sending them.')
				906	parser.add_argument('--logdir', default=_get_logdir(argv[0]),
				907	help='Directory where logs will be written.')
				908	parser.add_argument('boardnames', nargs='*',
				909	metavar='BOARD',
				910	help='names of boards to report on '
				911	'(default: all boards)')
				912	arguments = parser.parse_args(argv[1:])
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	913	if not _verify_arguments(arguments):
				914	return None
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	915	return arguments
				916
				917
				918	def _configure_logging(arguments):
				919	"""Configure the `logging` module for our needs.
				920
				921	How we log depends on whether the `--print` option was
				922	provided on the command line. Without the option, we log all
				923	messages at DEBUG level or above, and write them to a file in
				924	the directory specified by the `--logdir` option. With the
				925	option, we write log messages to stdout; messages below INFO
				926	level are discarded.
				927
				928	The log file is configured to rotate once a week on Friday
				929	evening, preserving ~3 months worth of history.
				930
				931	@param arguments Command-line arguments as returned by
				932	`ArgumentParser`
				933
				934	"""
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	935	root_logger = logging.getLogger()
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	936	if arguments.debug:
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	937	root_logger.setLevel(logging.INFO)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	938	handler = logging.StreamHandler(sys.stdout)
				939	handler.setFormatter(logging.Formatter())
				940	else:
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	941	root_logger.setLevel(logging.DEBUG)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	942	logfile = os.path.join(arguments.logdir, _LOGFILE)
				943	handler = logging.handlers.TimedRotatingFileHandler(
				944	logfile, when='W4', backupCount=13)
				945	formatter = logging.Formatter(_LOG_FORMAT,
				946	time_utils.TIME_FMT)
				947	handler.setFormatter(formatter)
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	948	# TODO(jrbarnette) This is gross. Importing client.bin.utils
				949	# implicitly imported logging_config, which calls
				950	# logging.basicConfig() at module level. That gives us an
				951	# extra logging handler that we don't want. So, clear out all
				952	# the handlers here.
				953	for h in root_logger.handlers:
				954	root_logger.removeHandler(h)
				955	root_logger.addHandler(handler)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	956
				957
				958	def _populate_board_counts(inventory):
				959	"""Gather board counts while providing interactive feedback.
				960
				961	Gathering the status of all individual DUTs in the lab can take
				962	considerable time (~30 minutes at the time of this writing).
				963
				964	Normally, we pay that cost by querying as we go. However, with
				965	the `--print` option, a human being may be watching the
				966	progress. So, we force the first (expensive) queries to happen
				967	up front, and provide a small ASCII progress bar to give an
				968	indicator of how many boards have been processed.
				969
				970	@param inventory _LabInventory object with the inventory to
				971	be gathered.
				972
				973	"""
				974	n = 0
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	975	total_broken = 0
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	976	for counts in inventory.values():
				977	n += 1
				978	if n % 10 == 5:
				979	c = '+'
				980	elif n % 10 == 0:
				981	c = '%d' % ((n / 10) % 10)
				982	else:
				983	c = '.'
				984	sys.stdout.write(c)
				985	sys.stdout.flush()
				986	# This next call is where all the time goes - it forces all
				987	# of a board's HostJobHistory objects to query the database
				988	# and cache their results.
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	989	total_broken += counts.get_broken()
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	990	sys.stdout.write('\n')
J. Richard Barnette	f683928	2015-06-01 16:00:35 -0700	[diff] [blame]	991	sys.stdout.write('Found %d broken DUTs\n' % total_broken)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	992
				993
				994	def main(argv):
				995	"""Standard main routine.
				996	@param argv Command line arguments including `sys.argv[0]`.
				997	"""
				998	arguments = _parse_command(argv)
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	999	if not arguments:
				1000	sys.exit(1)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1001	_configure_logging(arguments)
				1002	try:
				1003	end_time = int(time.time())
				1004	start_time = end_time - arguments.duration * 60 * 60
				1005	timestamp = time.strftime('%Y-%m-%d.%H',
				1006	time.localtime(end_time))
				1007	logging.debug('Starting lab inventory for %s', timestamp)
				1008	if arguments.board_notify:
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	1009	if arguments.recommend:
				1010	logging.debug('Will include repair recommendations')
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1011	logging.debug('Will include board inventory')
				1012	if arguments.pool_notify:
				1013	logging.debug('Will include pool inventory')
				1014
J. Richard Barnette	a7c514e	2015-09-15 11:13:23 -0700	[diff] [blame]	1015	afe = frontend_wrappers.RetryingAFE(server=None)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1016	inventory = _LabInventory.create_inventory(
				1017	afe, start_time, end_time, arguments.boardnames)
				1018	logging.info('Found %d hosts across %d boards',
				1019	inventory.get_num_duts(),
				1020	inventory.get_num_boards())
				1021
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	1022	if arguments.debug:
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1023	_populate_board_counts(inventory)
				1024
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	1025	if arguments.board_notify:
J. Richard Barnette	1df6a56	2015-06-09 10:06:17 -0700	[diff] [blame]	1026	if arguments.recommend:
				1027	recommend_message = _generate_repair_recommendation(
				1028	inventory, arguments.recommend) + '\n\n\n'
				1029	else:
				1030	recommend_message = ''
				1031	board_message = _generate_board_inventory_message(inventory)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1032	_send_email(arguments,
				1033	'boards-%s.txt' % timestamp,
				1034	'DUT board inventory %s' % timestamp,
				1035	arguments.board_notify,
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	1036	recommend_message + board_message)
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1037
J. Richard Barnette	02e8243	2015-10-13 16:02:47 -0700	[diff] [blame]	1038	if arguments.pool_notify:
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1039	_send_email(arguments,
				1040	'pools-%s.txt' % timestamp,
				1041	'DUT pool inventory %s' % timestamp,
				1042	arguments.pool_notify,
				1043	_generate_pool_inventory_message(inventory))
				1044	except KeyboardInterrupt:
				1045	pass
				1046	except EnvironmentError as e:
				1047	logging.exception('Unexpected OS error: %s', e)
				1048	except Exception as e:
				1049	logging.exception('Unexpected exception: %s', e)
				1050
				1051
J. Richard Barnette	aa86893	2015-10-23 13:28:59 -0700	[diff] [blame]	1052	def get_managed_boards(afe):
				1053	end_time = int(time.time())
				1054	start_time = end_time - 24 * 60 * 60
				1055	inventory = _LabInventory.create_inventory(
				1056	afe, start_time, end_time)
				1057	return inventory.get_managed_boards()
				1058
				1059
J. Richard Barnette	96db349	2015-03-27 17:23:52 -0700	[diff] [blame]	1060	if __name__ == '__main__':
				1061	main(sys.argv)