blob: 3738cef25c247c619c6b5281cbaa2b19a9ec1576 [file] [log] [blame]
J. Richard Barnette96db3492015-03-27 17:23:52 -07001#!/usr/bin/env python
2# Copyright 2015 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Create e-mail reports of the Lab's DUT inventory.
7
8Gathers a list of all DUTs of interest in the Lab, segregated by
9board and pool, and determines whether each DUT is working or
10broken. Then, send one or more e-mail reports summarizing the
11status to e-mail addresses provided on the command line.
12
13usage: lab_inventory.py [ options ] [ board ... ]
14
15Options:
16--duration / -d <hours>
17 How far back in time to search job history to determine DUT
18 status.
19
20--board-notify <address>[,<address>]
21 Send the "board status" e-mail to all the specified e-mail
22 addresses.
23
24--pool-notify <address>[,<address>]
25 Send the "pool status" e-mail to all the specified e-mail
26 addresses.
27
J. Richard Barnette1df6a562015-06-09 10:06:17 -070028--recommend <number>
29 When generating the "board status" e-mail, included a list of
30 <number> specific DUTs to be recommended for repair.
31
J. Richard Barnette96db3492015-03-27 17:23:52 -070032--logdir <directory>
33 Log progress and actions in a file under this directory. Text
34 of any e-mail sent will also be logged in a timestamped file in
35 this directory.
36
J. Richard Barnette02e82432015-10-13 16:02:47 -070037--debug
J. Richard Barnette96db3492015-03-27 17:23:52 -070038 Suppress all logging and sending e-mail. Instead, write the
39 output that would be generated onto stdout.
40
41<board> arguments:
42 With no arguments, gathers the status for all boards in the lab.
43 With one or more named boards on the command line, restricts
44 reporting to just those boards.
45
46"""
47
48
49import argparse
50import logging
51import logging.handlers
52import os
J. Richard Barnettef6839282015-06-01 16:00:35 -070053import re
J. Richard Barnette96db3492015-03-27 17:23:52 -070054import sys
55import time
56
57import common
J. Richard Barnettef6839282015-06-01 16:00:35 -070058from autotest_lib.client.bin import utils
J. Richard Barnette96db3492015-03-27 17:23:52 -070059from autotest_lib.client.common_lib import time_utils
J. Richard Barnettea7c514e2015-09-15 11:13:23 -070060from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
J. Richard Barnettef6839282015-06-01 16:00:35 -070061from autotest_lib.server.hosts import servo_host
Aviv Keshet7ee95862016-08-30 15:18:27 -070062from autotest_lib.server.lib import status_history
J. Richard Barnette96db3492015-03-27 17:23:52 -070063from autotest_lib.site_utils import gmail_lib
J. Richard Barnette96db3492015-03-27 17:23:52 -070064from autotest_lib.site_utils.suite_scheduler import constants
65
66
Richard Barnette673573b2016-12-12 09:46:39 -080067CRITICAL_POOLS = constants.Pools.CRITICAL_POOLS
68SPARE_POOL = constants.Pools.SPARE_POOL
69MANAGED_POOLS = constants.Pools.MANAGED_POOLS
J. Richard Barnette96db3492015-03-27 17:23:52 -070070
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -070071# _EXCLUDED_LABELS - A set of labels that disqualify a DUT from
72# monitoring by this script. Currently, we're excluding any
73# 'adb' host, because we're not ready to monitor Android or
74# Brillo hosts.
Kevin Chengcf0ad2b2016-04-19 14:51:39 -070075
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -070076_EXCLUDED_LABELS = set(['adb'])
77
J. Richard Barnette96db3492015-03-27 17:23:52 -070078# _DEFAULT_DURATION:
79# Default value used for the --duration command line option.
80# Specifies how far back in time to search in order to determine
81# DUT status.
82
83_DEFAULT_DURATION = 24
84
J. Richard Barnette96db3492015-03-27 17:23:52 -070085# _LOGDIR:
86# Relative path used in the calculation of the default setting
87# for the --logdir option. The full path path is relative to
88# the root of the autotest directory, as determined from
89# sys.argv[0].
90# _LOGFILE:
91# Basename of a file to which general log information will be
92# written.
93# _LOG_FORMAT:
94# Format string for log messages.
95
96_LOGDIR = os.path.join('logs', 'dut-data')
97_LOGFILE = 'lab-inventory.log'
98_LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s'
99
J. Richard Barnettef6839282015-06-01 16:00:35 -0700100# Pattern describing location-based host names in the Chrome OS test
101# labs. Each DUT hostname designates the DUT's location:
102# * A lab (room) that's physically separated from other labs
103# (i.e. there's a door).
104# * A row (or aisle) of DUTs within the lab.
105# * A vertical rack of shelves on the row.
106# * A specific host on one shelf of the rack.
107
108_HOSTNAME_PATTERN = re.compile(
109 r'(chromeos\d+)-row(\d+)-rack(\d+)-host(\d+)')
110
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700111# Default entry for managed pools.
112
113_MANAGED_POOL_DEFAULT = 'all_pools'
114
J. Richard Barnette96db3492015-03-27 17:23:52 -0700115
116class _PoolCounts(object):
117 """Maintains a set of `HostJobHistory` objects for a pool.
118
119 The collected history objects are nominally all part of a single
J. Richard Barnettef6839282015-06-01 16:00:35 -0700120 scheduling pool of DUTs. The collection maintains a list of
121 working DUTs, a list of broken DUTs, and a list of all DUTs.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700122
J. Richard Barnettef6839282015-06-01 16:00:35 -0700123 Performance note: Certain methods in this class are potentially
124 expensive:
125 * `get_working()`
126 * `get_working_list()`
127 * `get_broken()`
128 * `get_broken_list()`
xixuan12ce04f2016-03-10 13:16:30 -0800129 * `get_idle()`
130 * `get_idle_list()`
J. Richard Barnettef6839282015-06-01 16:00:35 -0700131 The first time any one of these methods is called, it causes
132 multiple RPC calls with a relatively expensive set of database
133 queries. However, the results of the queries are cached in the
134 individual `HostJobHistory` objects, so only the first call
135 actually pays the full cost.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700136
xixuan12ce04f2016-03-10 13:16:30 -0800137 Additionally, `get_working_list()`, `get_broken_list()` and
138 `get_idle_list()` cache their return values to avoid recalculating
139 lists at every call; this caching is separate from the caching of RPC
140 results described above.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700141
142 This class is deliberately constructed to delay the RPC cost
143 until the accessor methods are called (rather than to query in
J. Richard Barnette96db3492015-03-27 17:23:52 -0700144 `record_host()`) so that it's possible to construct a complete
145 `_LabInventory` without making the expensive queries at creation
J. Richard Barnettef6839282015-06-01 16:00:35 -0700146 time. `_populate_board_counts()`, below, assumes this behavior.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700147
148 """
149
150 def __init__(self):
151 self._histories = []
J. Richard Barnettef6839282015-06-01 16:00:35 -0700152 self._working_list = None
153 self._broken_list = None
xixuan12ce04f2016-03-10 13:16:30 -0800154 self._idle_list = None
J. Richard Barnette96db3492015-03-27 17:23:52 -0700155
156
157 def record_host(self, host_history):
158 """Add one `HostJobHistory` object to the collection.
159
160 @param host_history The `HostJobHistory` object to be
161 remembered.
162
163 """
J. Richard Barnettef6839282015-06-01 16:00:35 -0700164 self._working_list = None
165 self._broken_list = None
xixuan12ce04f2016-03-10 13:16:30 -0800166 self._idle_list = None
J. Richard Barnette96db3492015-03-27 17:23:52 -0700167 self._histories.append(host_history)
168
169
J. Richard Barnettef6839282015-06-01 16:00:35 -0700170 def get_working_list(self):
171 """Return a list of all working DUTs in the pool.
172
173 Filter `self._histories` for histories where the last
174 diagnosis is `WORKING`.
175
176 Cache the result so that we only cacluate it once.
177
178 @return A list of HostJobHistory objects.
179
180 """
181 if self._working_list is None:
182 self._working_list = [h for h in self._histories
183 if h.last_diagnosis()[0] == status_history.WORKING]
184 return self._working_list
185
186
J. Richard Barnette96db3492015-03-27 17:23:52 -0700187 def get_working(self):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700188 """Return the number of working DUTs in the pool."""
189 return len(self.get_working_list())
190
191
192 def get_broken_list(self):
193 """Return a list of all broken DUTs in the pool.
194
195 Filter `self._histories` for histories where the last
xixuan12ce04f2016-03-10 13:16:30 -0800196 diagnosis is `BROKEN`.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700197
198 Cache the result so that we only cacluate it once.
199
200 @return A list of HostJobHistory objects.
201
202 """
203 if self._broken_list is None:
204 self._broken_list = [h for h in self._histories
xixuan12ce04f2016-03-10 13:16:30 -0800205 if h.last_diagnosis()[0] == status_history.BROKEN]
J. Richard Barnettef6839282015-06-01 16:00:35 -0700206 return self._broken_list
J. Richard Barnette96db3492015-03-27 17:23:52 -0700207
208
209 def get_broken(self):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700210 """Return the number of broken DUTs in the pool."""
211 return len(self.get_broken_list())
J. Richard Barnette96db3492015-03-27 17:23:52 -0700212
213
xixuan12ce04f2016-03-10 13:16:30 -0800214 def get_idle_list(self):
215 """Return a list of all idle DUTs in the pool.
216
217 Filter `self._histories` for histories where the last
218 diagnosis is `UNUSED` or `UNKNOWN`.
219
220 Cache the result so that we only cacluate it once.
221
222 @return A list of HostJobHistory objects.
223
224 """
225 idle_list = [status_history.UNUSED, status_history.UNKNOWN]
226 if self._idle_list is None:
227 self._idle_list = [h for h in self._histories
228 if h.last_diagnosis()[0] in idle_list]
229 return self._idle_list
230
231
232 def get_idle(self):
233 """Return the number of idle DUTs in the pool."""
234 return len(self.get_idle_list())
235
236
J. Richard Barnette96db3492015-03-27 17:23:52 -0700237 def get_total(self):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700238 """Return the total number of DUTs in the pool."""
J. Richard Barnette96db3492015-03-27 17:23:52 -0700239 return len(self._histories)
240
241
242class _BoardCounts(object):
243 """Maintains a set of `HostJobHistory` objects for a board.
244
245 The collected history objects are nominally all of the same
246 board. The collection maintains a count of working DUTs, a
247 count of broken DUTs, and a total count. The counts can be
248 obtained either for a single pool, or as a total across all
249 pools.
250
251 DUTs in the collection must be assigned to one of the pools
252 in `_MANAGED_POOLS`.
253
254 The `get_working()` and `get_broken()` methods rely on the
255 methods of the same name in _PoolCounts, so the performance
256 note in _PoolCounts applies here as well.
257
258 """
259
260 def __init__(self):
261 self._pools = {
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700262 pool: _PoolCounts() for pool in MANAGED_POOLS
J. Richard Barnette96db3492015-03-27 17:23:52 -0700263 }
264
265 def record_host(self, host_history):
266 """Add one `HostJobHistory` object to the collection.
267
268 @param host_history The `HostJobHistory` object to be
269 remembered.
270
271 """
J. Richard Barnette3d0590a2015-04-29 12:56:12 -0700272 pool = host_history.host_pool
J. Richard Barnette96db3492015-03-27 17:23:52 -0700273 self._pools[pool].record_host(host_history)
274
275
276 def _count_pool(self, get_pool_count, pool=None):
277 """Internal helper to count hosts in a given pool.
278
279 The `get_pool_count` parameter is a function to calculate
280 the exact count of interest for the pool.
281
282 @param get_pool_count Function to return a count from a
283 _PoolCount object.
284 @param pool The pool to be counted. If `None`,
285 return the total across all pools.
286
287 """
288 if pool is None:
289 return sum([get_pool_count(counts)
290 for counts in self._pools.values()])
291 else:
292 return get_pool_count(self._pools[pool])
293
294
J. Richard Barnettef6839282015-06-01 16:00:35 -0700295 def get_working_list(self):
296 """Return a list of all working DUTs for the board.
297
298 Go through all HostJobHistory objects in the board's pools,
299 selecting the ones where the last diagnosis is `WORKING`.
300
301 @return A list of HostJobHistory objects.
302
303 """
304 l = []
305 for p in self._pools.values():
306 l.extend(p.get_working_list())
307 return l
308
309
J. Richard Barnette96db3492015-03-27 17:23:52 -0700310 def get_working(self, pool=None):
311 """Return the number of working DUTs in a pool.
312
313 @param pool The pool to be counted. If `None`, return the
314 total across all pools.
315
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700316 @return The total number of working DUTs in the selected
317 pool(s).
J. Richard Barnette96db3492015-03-27 17:23:52 -0700318 """
319 return self._count_pool(_PoolCounts.get_working, pool)
320
321
J. Richard Barnettef6839282015-06-01 16:00:35 -0700322 def get_broken_list(self):
323 """Return a list of all broken DUTs for the board.
324
325 Go through all HostJobHistory objects in the board's pools,
xixuan12ce04f2016-03-10 13:16:30 -0800326 selecting the ones where the last diagnosis is `BROKEN`.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700327
328 @return A list of HostJobHistory objects.
329
330 """
331 l = []
332 for p in self._pools.values():
333 l.extend(p.get_broken_list())
334 return l
335
336
J. Richard Barnette96db3492015-03-27 17:23:52 -0700337 def get_broken(self, pool=None):
338 """Return the number of broken DUTs in a pool.
339
340 @param pool The pool to be counted. If `None`, return the
341 total across all pools.
342
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700343 @return The total number of broken DUTs in the selected pool(s).
J. Richard Barnette96db3492015-03-27 17:23:52 -0700344 """
345 return self._count_pool(_PoolCounts.get_broken, pool)
346
347
xixuan12ce04f2016-03-10 13:16:30 -0800348 def get_idle_list(self, pool=None):
349 """Return a list of all idle DUTs for the board.
350
351 Go through all HostJobHistory objects in the board's pools,
352 selecting the ones where the last diagnosis is `UNUSED` or `UNKNOWN`.
353
354 @param pool: The pool to be counted. If `None`, return the total list
355 across all pools.
356
357 @return A list of HostJobHistory objects.
358
359 """
360 if pool is None:
361 l = []
362 for p in self._pools.values():
363 l.extend(p.get_idle_list())
364 return l
365 else:
366 return _PoolCounts.get_idle_list(self._pools[pool])
367
368
369 def get_idle(self, pool=None):
370 """Return the number of idle DUTs in a pool.
371
372 @param pool: The pool to be counted. If `None`, return the total
373 across all pools.
374
375 @return The total number of idle DUTs in the selected pool(s).
376 """
377 return self._count_pool(_PoolCounts.get_idle, pool)
378
379
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700380 def get_spares_buffer(self):
381 """Return the the nominal number of working spares.
382
383 Calculates and returns how many working spares there would
384 be in the spares pool if all broken DUTs were in the spares
385 pool. This number may be negative, indicating a shortfall
386 in the critical pools.
387
388 @return The total number DUTs in the spares pool, less the total
389 number of broken DUTs in all pools.
390 """
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700391 return self.get_total(SPARE_POOL) - self.get_broken()
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700392
393
J. Richard Barnette96db3492015-03-27 17:23:52 -0700394 def get_total(self, pool=None):
395 """Return the total number of DUTs in a pool.
396
397 @param pool The pool to be counted. If `None`, return the
398 total across all pools.
399
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700400 @return The total number of DUTs in the selected pool(s).
J. Richard Barnette96db3492015-03-27 17:23:52 -0700401 """
402 return self._count_pool(_PoolCounts.get_total, pool)
403
404
405class _LabInventory(dict):
406 """Collection of `HostJobHistory` objects for the Lab's inventory.
407
408 The collection is indexed by board. Indexing returns the
409 _BoardCounts object associated with the board.
410
411 The collection is also iterable. The iterator returns all the
412 boards in the inventory, in unspecified order.
413
414 """
415
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -0700416 @staticmethod
417 def _eligible_host(afehost):
418 """Return whether this host is eligible for monitoring.
419
420 Hosts with any label that's in `_EXCLUDED_LABELS` aren't
421 eligible.
422
423 @param afehost The host to be tested for eligibility.
424 """
425 return not len(_EXCLUDED_LABELS.intersection(afehost.labels))
426
427
J. Richard Barnette96db3492015-03-27 17:23:52 -0700428 @classmethod
429 def create_inventory(cls, afe, start_time, end_time, boardlist=[]):
430 """Return a Lab inventory with specified parameters.
431
432 By default, gathers inventory from `HostJobHistory` objects
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700433 for all DUTs in the `MANAGED_POOLS` list. If `boardlist`
J. Richard Barnette96db3492015-03-27 17:23:52 -0700434 is supplied, the inventory will be restricted to only the
435 given boards.
436
437 @param afe AFE object for constructing the
438 `HostJobHistory` objects.
439 @param start_time Start time for the `HostJobHistory`
440 objects.
441 @param end_time End time for the `HostJobHistory`
442 objects.
443 @param boardlist List of boards to include. If empty,
444 include all available boards.
445 @return A `_LabInventory` object for the specified boards.
446
447 """
448 label_list = [constants.Labels.POOL_PREFIX + l
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700449 for l in MANAGED_POOLS]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700450 afehosts = afe.get_hosts(labels__name__in=label_list)
451 if boardlist:
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -0700452 # We're deliberately not checking host eligibility in this
453 # code path. This is a debug path, not used in production;
454 # it may be useful to include ineligible hosts here.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700455 boardhosts = []
456 for board in boardlist:
457 board_label = constants.Labels.BOARD_PREFIX + board
458 host_list = [h for h in afehosts
459 if board_label in h.labels]
460 boardhosts.extend(host_list)
461 afehosts = boardhosts
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -0700462 else:
463 afehosts = [h for h in afehosts if cls._eligible_host(h)]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700464 create = lambda host: (
465 status_history.HostJobHistory(afe, host,
466 start_time, end_time))
467 return cls([create(host) for host in afehosts])
468
469
470 def __init__(self, histories):
J. Richard Barnette6948ed32015-05-06 08:57:10 -0700471 # N.B. The query that finds our hosts is restricted to those
472 # with a valid pool: label, but doesn't check for a valid
473 # board: label. In some (insufficiently) rare cases, the
474 # AFE hosts table has been known to (incorrectly) have DUTs
475 # with a pool: but no board: label. We explicitly exclude
476 # those here.
477 histories = [h for h in histories
478 if h.host_board is not None]
J. Richard Barnette3d0590a2015-04-29 12:56:12 -0700479 boards = set([h.host_board for h in histories])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700480 initval = { board: _BoardCounts() for board in boards }
481 super(_LabInventory, self).__init__(initval)
482 self._dut_count = len(histories)
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700483 self._managed_boards = {}
J. Richard Barnette96db3492015-03-27 17:23:52 -0700484 for h in histories:
J. Richard Barnette3d0590a2015-04-29 12:56:12 -0700485 self[h.host_board].record_host(h)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700486
487
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700488 def get_managed_boards(self, pool=_MANAGED_POOL_DEFAULT):
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700489 """Return the set of "managed" boards.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700490
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700491 Operationally, saying a board is "managed" means that the
492 board will be included in the "board" and "repair
493 recommendations" reports. That is, if there are failures in
494 the board's inventory then lab techs will be asked to fix
495 them without a separate ticket.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700496
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700497 For purposes of implementation, a board is "managed" if it
498 has DUTs in both the spare and a non-spare (i.e. critical)
499 pool.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700500
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700501 @param pool: The specified pool for managed boards.
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700502 @return A set of all the boards that have both spare and
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700503 non-spare pools, unless the pool is specified,
504 then the set of boards in that pool.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700505 """
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700506 if self._managed_boards.get(pool, None) is None:
507 self._managed_boards[pool] = set()
J. Richard Barnettef6839282015-06-01 16:00:35 -0700508 for board, counts in self.items():
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700509 # Get the counts for all pools, otherwise get it for the
510 # specified pool.
511 if pool == _MANAGED_POOL_DEFAULT:
512 spares = counts.get_total(SPARE_POOL)
513 total = counts.get_total()
514 if spares != 0 and spares != total:
515 self._managed_boards[pool].add(board)
516 else:
517 if counts.get_total(pool) != 0:
518 self._managed_boards[pool].add(board)
519 return self._managed_boards[pool]
J. Richard Barnettef6839282015-06-01 16:00:35 -0700520
521
J. Richard Barnette96db3492015-03-27 17:23:52 -0700522 def get_num_duts(self):
523 """Return the total number of DUTs in the inventory."""
524 return self._dut_count
525
526
527 def get_num_boards(self):
528 """Return the total number of boards in the inventory."""
529 return len(self)
530
531
J. Richard Barnettef6839282015-06-01 16:00:35 -0700532def _sort_by_location(inventory_list):
533 """Return a list of DUTs, organized by location.
534
535 Take the given list of `HostJobHistory` objects, separate it
536 into a list per lab, and sort each lab's list by location. The
537 order of sorting within a lab is
538 * By row number within the lab,
539 * then by rack number within the row,
540 * then by host shelf number within the rack.
541
542 Return a list of the sorted lists.
543
544 Implementation note: host locations are sorted by converting
545 each location into a base 100 number. If row, rack or
546 host numbers exceed the range [0..99], then sorting will
547 break down.
548
549 @return A list of sorted lists of DUTs.
550
551 """
552 BASE = 100
553 lab_lists = {}
554 for history in inventory_list:
555 location = _HOSTNAME_PATTERN.match(history.host.hostname)
556 if location:
557 lab = location.group(1)
558 key = 0
559 for idx in location.group(2, 3, 4):
560 key = BASE * key + int(idx)
561 lab_lists.setdefault(lab, []).append((key, history))
562 return_list = []
563 for dut_list in lab_lists.values():
564 dut_list.sort(key=lambda t: t[0])
565 return_list.append([t[1] for t in dut_list])
566 return return_list
567
568
569def _score_repair_set(buffer_counts, repair_list):
570 """Return a numeric score rating a set of DUTs to be repaired.
571
572 `buffer_counts` is a dictionary mapping board names to the
573 size of the board's spares buffer.
574
575 `repair_list` is a list of DUTs to be repaired.
576
577 This function calculates the new set of buffer counts that would
578 result from the proposed repairs, and scores the new set using
579 two numbers:
580 * Worst case buffer count for any board (higher is better).
581 This is the more siginficant number for comparison.
582 * Number of boards at the worst case (lower is better). This
583 is the less significant number.
584
585 Implementation note: The score could fail to reflect the
586 intended criteria if there are more than 1000 boards in the
587 inventory.
588
589 @param spare_counts A dictionary mapping boards to buffer counts.
590 @param repair_list A list of boards to be repaired.
591 @return A numeric score.
592
593 """
594 # Go through `buffer_counts`, and create a list of new counts
595 # that records the buffer count for each board after repair.
596 # The new list of counts discards the board names, as they don't
597 # contribute to the final score.
598 _NBOARDS = 1000
599 repair_inventory = _LabInventory(repair_list)
600 new_counts = []
601 for b, c in buffer_counts.items():
602 if b in repair_inventory:
603 newcount = repair_inventory[b].get_total()
604 else:
605 newcount = 0
606 new_counts.append(c + newcount)
607 # Go through the new list of counts. Find the worst available
608 # spares count, and count how many times that worst case occurs.
609 worst_count = new_counts[0]
610 num_worst = 1
611 for c in new_counts[1:]:
612 if c == worst_count:
613 num_worst += 1
614 elif c < worst_count:
615 worst_count = c
616 num_worst = 1
617 # Return the calculated score
618 return _NBOARDS * worst_count - num_worst
619
620
621def _generate_repair_recommendation(inventory, num_recommend):
622 """Return a summary of selected DUTs needing repair.
623
624 Returns a message recommending a list of broken DUTs to be
625 repaired. The list of DUTs is selected based on these
626 criteria:
627 * No more than `num_recommend` DUTs will be listed.
628 * All DUTs must be in the same lab.
629 * DUTs should be selected for some degree of physical
630 proximity.
631 * DUTs for boards with a low spares buffer are more important
632 than DUTs with larger buffers.
633
634 The algorithm used will guarantee that at least one DUT from a
635 board with the smallest spares buffer will be recommended. If
636 the worst spares buffer number is shared by more than one board,
637 the algorithm will tend to prefer repair sets that include more
638 of those boards over sets that cover fewer boards.
639
J. Richard Barnette1df6a562015-06-09 10:06:17 -0700640 @param inventory Inventory for generating recommendations.
641 @param num_recommend Number of DUTs to recommend for repair.
642
J. Richard Barnettef6839282015-06-01 16:00:35 -0700643 """
644 logging.debug('Creating DUT repair recommendations')
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700645 board_buffer_counts = {}
646 broken_list = []
647 for board in inventory.get_managed_boards():
648 logging.debug('Listing failed DUTs for %s', board)
649 counts = inventory[board]
650 if counts.get_broken() != 0:
651 board_buffer_counts[board] = counts.get_spares_buffer()
652 broken_list.extend(counts.get_broken_list())
J. Richard Barnette55127432015-10-13 17:01:56 -0700653 # N.B. The logic inside this loop may seem complicated, but
J. Richard Barnettef6839282015-06-01 16:00:35 -0700654 # simplification is hard:
655 # * Calculating an initial recommendation outside of
656 # the loop likely would make things more complicated,
657 # not less.
658 # * It's necessary to calculate an initial lab slice once per
659 # lab _before_ the while loop, in case the number of broken
660 # DUTs in a lab is less than `num_recommend`.
J. Richard Barnette55127432015-10-13 17:01:56 -0700661 recommendation = None
662 best_score = None
663 for lab_duts in _sort_by_location(broken_list):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700664 start = 0
665 end = num_recommend
666 lab_slice = lab_duts[start : end]
667 lab_score = _score_repair_set(board_buffer_counts,
668 lab_slice)
669 while end < len(lab_duts):
670 start += 1
671 end += 1
672 new_slice = lab_duts[start : end]
673 new_score = _score_repair_set(board_buffer_counts,
674 new_slice)
675 if new_score > lab_score:
676 lab_slice = new_slice
677 lab_score = new_score
678 if recommendation is None or lab_score > best_score:
679 recommendation = lab_slice
680 best_score = lab_score
J. Richard Barnette5d0fa512016-04-05 17:39:52 -0700681 # N.B. The trailing space here is manadatory: Without it, Gmail
682 # will parse the URL wrong. Don't ask. If you simply _must_
683 # know more, go try it yourself...
684 line_fmt = '%-30s %-16s %-6s\n %s '
J. Richard Barnette1df6a562015-06-09 10:06:17 -0700685 message = ['Repair recommendations:\n',
J. Richard Barnette5d0fa512016-04-05 17:39:52 -0700686 line_fmt % ( 'Hostname', 'Board', 'Servo?', 'Logs URL')]
J. Richard Barnettef6839282015-06-01 16:00:35 -0700687 for h in recommendation:
688 servo_name = servo_host.make_servo_hostname(h.host.hostname)
J. Richard Barnette5d0fa512016-04-05 17:39:52 -0700689 servo_present = utils.host_is_in_lab_zone(servo_name)
690 _, event = h.last_diagnosis()
691 line = line_fmt % (
692 h.host.hostname, h.host_board,
693 'Yes' if servo_present else 'No', event.job_url)
J. Richard Barnettef6839282015-06-01 16:00:35 -0700694 message.append(line)
695 return '\n'.join(message)
696
697
J. Richard Barnette96db3492015-03-27 17:23:52 -0700698def _generate_board_inventory_message(inventory):
699 """Generate the "board inventory" e-mail message.
700
701 The board inventory is a list by board summarizing the number
702 of working and broken DUTs, and the total shortfall or surplus
703 of working devices relative to the minimum critical pool
704 requirement.
705
706 The report omits boards with no DUTs in the spare pool or with
707 no DUTs in a critical pool.
708
709 N.B. For sample output text formattted as users can expect to
710 see it in e-mail and log files, refer to the unit tests.
711
712 @param inventory _LabInventory object with the inventory to
713 be reported on.
714 @return String with the inventory message to be sent.
715
716 """
717 logging.debug('Creating board inventory')
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700718 nworking = 0
719 nbroken = 0
xixuan12ce04f2016-03-10 13:16:30 -0800720 nidle = 0
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700721 nbroken_boards = 0
J. Richard Barnetteea5a4ba2016-02-18 16:34:50 -0800722 ntotal_boards = 0
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700723 summaries = []
724 for board in inventory.get_managed_boards():
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700725 counts = inventory[board]
Richard Barnette254d5b42016-07-06 19:13:23 -0700726 logging.debug('Counting %2d DUTS for board %s',
727 counts.get_total(), board)
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700728 # Summary elements laid out in the same order as the text
729 # headers:
xixuan12ce04f2016-03-10 13:16:30 -0800730 # Board Avail Bad Idle Good Spare Total
731 # e[0] e[1] e[2] e[3] e[4] e[5] e[6]
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700732 element = (board,
733 counts.get_spares_buffer(),
734 counts.get_broken(),
xixuan12ce04f2016-03-10 13:16:30 -0800735 counts.get_idle(),
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700736 counts.get_working(),
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700737 counts.get_total(SPARE_POOL),
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700738 counts.get_total())
J. Richard Barnetteea5a4ba2016-02-18 16:34:50 -0800739 if element[2]:
740 summaries.append(element)
741 nbroken_boards += 1
742 ntotal_boards += 1
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700743 nbroken += element[2]
xixuan12ce04f2016-03-10 13:16:30 -0800744 nidle += element[3]
745 nworking += element[4]
746 ntotal = nworking + nbroken + nidle
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700747 summaries = sorted(summaries, key=lambda e: (e[1], -e[2]))
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700748 broken_percent = int(round(100.0 * nbroken / ntotal))
xixuan12ce04f2016-03-10 13:16:30 -0800749 idle_percent = int(round(100.0 * nidle / ntotal))
750 working_percent = 100 - broken_percent - idle_percent
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700751 message = ['Summary of DUTs in inventory:',
xixuan12ce04f2016-03-10 13:16:30 -0800752 '%10s %10s %10s %6s' % ('Bad', 'Idle', 'Good', 'Total'),
753 '%5d %3d%% %5d %3d%% %5d %3d%% %6d' % (
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700754 nbroken, broken_percent,
xixuan12ce04f2016-03-10 13:16:30 -0800755 nidle, idle_percent,
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700756 nworking, working_percent,
757 ntotal),
758 '',
759 'Boards with failures: %d' % nbroken_boards,
J. Richard Barnetteea5a4ba2016-02-18 16:34:50 -0800760 'Boards in inventory: %d' % ntotal_boards,
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700761 '', '',
762 'Full board inventory:\n',
xixuan12ce04f2016-03-10 13:16:30 -0800763 '%-22s %5s %5s %5s %5s %5s %5s' % (
764 'Board', 'Avail', 'Bad', 'Idle', 'Good',
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700765 'Spare', 'Total')]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700766 message.extend(
xixuan12ce04f2016-03-10 13:16:30 -0800767 ['%-22s %5d %5d %5d %5d %5d %5d' % e for e in summaries])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700768 return '\n'.join(message)
769
770
J. Richard Barnette4845fcf2015-04-20 14:26:25 -0700771_POOL_INVENTORY_HEADER = '''\
Aviv Keshet056d74c2015-07-14 09:18:43 -0700772Notice to Infrastructure deputies: All boards shown below are at
J. Richard Barnettec9a143c2015-06-04 11:11:19 -0700773less than full strength, please take action to resolve the issues.
774Once you're satisified that failures won't recur, failed DUTs can
775be replaced with spares by running `balance_pool`. Detailed
776instructions can be found here:
J. Richard Barnette4845fcf2015-04-20 14:26:25 -0700777 http://go/cros-manage-duts
778'''
779
780
J. Richard Barnette96db3492015-03-27 17:23:52 -0700781def _generate_pool_inventory_message(inventory):
782 """Generate the "pool inventory" e-mail message.
783
784 The pool inventory is a list by pool and board summarizing the
785 number of working and broken DUTs in the pool. Only boards with
786 at least one broken DUT are included in the list.
787
788 N.B. For sample output text formattted as users can expect to
789 see it in e-mail and log files, refer to the unit tests.
790
791 @param inventory _LabInventory object with the inventory to
792 be reported on.
793 @return String with the inventory message to be sent.
794
795 """
796 logging.debug('Creating pool inventory')
J. Richard Barnette4845fcf2015-04-20 14:26:25 -0700797 message = [_POOL_INVENTORY_HEADER]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700798 newline = ''
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700799 for pool in CRITICAL_POOLS:
J. Richard Barnette96db3492015-03-27 17:23:52 -0700800 message.append(
801 '%sStatus for pool:%s, by board:' % (newline, pool))
802 message.append(
xixuan12ce04f2016-03-10 13:16:30 -0800803 '%-20s %5s %5s %5s %5s' % (
804 'Board', 'Bad', 'Idle', 'Good', 'Total'))
J. Richard Barnette96db3492015-03-27 17:23:52 -0700805 data_list = []
806 for board, counts in inventory.items():
Richard Barnette254d5b42016-07-06 19:13:23 -0700807 logging.debug('Counting %2d DUTs for %s, %s',
808 counts.get_total(pool), board, pool)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700809 broken = counts.get_broken(pool)
xixuan12ce04f2016-03-10 13:16:30 -0800810 idle = counts.get_idle(pool)
811 # boards at full strength are not reported
812 if broken == 0 and idle == 0:
J. Richard Barnette96db3492015-03-27 17:23:52 -0700813 continue
814 working = counts.get_working(pool)
815 total = counts.get_total(pool)
xixuan12ce04f2016-03-10 13:16:30 -0800816 data_list.append((board, broken, idle, working, total))
J. Richard Barnette96db3492015-03-27 17:23:52 -0700817 if data_list:
818 data_list = sorted(data_list, key=lambda d: -d[1])
819 message.extend(
xixuan12ce04f2016-03-10 13:16:30 -0800820 ['%-20s %5d %5d %5d %5d' % t for t in data_list])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700821 else:
822 message.append('(All boards at full strength)')
823 newline = '\n'
824 return '\n'.join(message)
825
826
xixuan12ce04f2016-03-10 13:16:30 -0800827_IDLE_INVENTORY_HEADER = '''\
828Notice to Infrastructure deputies: The hosts shown below haven't
829run any jobs for at least 24 hours. Please check each host; locked
830hosts should normally be unlocked; stuck jobs should normally be
831aborted.
832'''
833
834
835def _generate_idle_inventory_message(inventory):
836 """Generate the "idle inventory" e-mail message.
837
838 The idle inventory is a host list with corresponding pool and board,
839 where the hosts are idle (`UNKWOWN` or `UNUSED`).
840
841 N.B. For sample output text format as users can expect to
842 see it in e-mail and log files, refer to the unit tests.
843
844 @param inventory _LabInventory object with the inventory to
845 be reported on.
846 @return String with the inventory message to be sent.
847
848 """
849 logging.debug('Creating idle inventory')
850 message = [_IDLE_INVENTORY_HEADER]
851 message.append('Idle Host List:')
852 message.append('%-30s %-20s %s' % ('Hostname', 'Board', 'Pool'))
853 data_list = []
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700854 for pool in MANAGED_POOLS:
xixuan12ce04f2016-03-10 13:16:30 -0800855 for board, counts in inventory.items():
Richard Barnette254d5b42016-07-06 19:13:23 -0700856 logging.debug('Counting %2d DUTs for %s, %s',
857 counts.get_total(pool), board, pool)
xixuan12ce04f2016-03-10 13:16:30 -0800858 data_list.extend([(dut.host.hostname, board, pool)
859 for dut in counts.get_idle_list(pool)])
860 if data_list:
861 message.extend(['%-30s %-20s %s' % t for t in data_list])
862 else:
863 message.append('(No idle DUTs)')
864 return '\n'.join(message)
865
866
J. Richard Barnette96db3492015-03-27 17:23:52 -0700867def _send_email(arguments, tag, subject, recipients, body):
868 """Send an inventory e-mail message.
869
870 The message is logged in the selected log directory using `tag`
871 for the file name.
872
873 If the --print option was requested, the message is neither
874 logged nor sent, but merely printed on stdout.
875
876 @param arguments Parsed command-line options.
877 @param tag Tag identifying the inventory for logging
878 purposes.
879 @param subject E-mail Subject: header line.
880 @param recipients E-mail addresses for the To: header line.
881 @param body E-mail message body.
882
883 """
884 logging.debug('Generating email: "%s"', subject)
885 all_recipients = ', '.join(recipients)
886 report_body = '\n'.join([
887 'To: %s' % all_recipients,
888 'Subject: %s' % subject,
889 '', body, ''])
J. Richard Barnette02e82432015-10-13 16:02:47 -0700890 if arguments.debug:
J. Richard Barnette96db3492015-03-27 17:23:52 -0700891 print report_body
892 else:
893 filename = os.path.join(arguments.logdir, tag)
894 try:
895 report_file = open(filename, 'w')
896 report_file.write(report_body)
897 report_file.close()
898 except EnvironmentError as e:
899 logging.error('Failed to write %s: %s', filename, e)
900 try:
901 gmail_lib.send_email(all_recipients, subject, body)
902 except Exception as e:
903 logging.error('Failed to send e-mail to %s: %s',
904 all_recipients, e)
905
906
907def _separate_email_addresses(address_list):
908 """Parse a list of comma-separated lists of e-mail addresses.
909
910 @param address_list A list of strings containing comma
911 separate e-mail addresses.
912 @return A list of the individual e-mail addresses.
913
914 """
915 newlist = []
916 for arg in address_list:
917 newlist.extend([email.strip() for email in arg.split(',')])
918 return newlist
919
920
921def _verify_arguments(arguments):
922 """Validate command-line arguments.
923
924 Join comma separated e-mail addresses for `--board-notify` and
925 `--pool-notify` in separate option arguments into a single list.
926
J. Richard Barnette02e82432015-10-13 16:02:47 -0700927 For non-debug uses, require that notification be requested for
928 at least one report. For debug, if notification isn't specified,
929 treat it as "run all the reports."
930
931 The return value indicates success or failure; in the case of
932 failure, we also write an error message to stderr.
933
J. Richard Barnette96db3492015-03-27 17:23:52 -0700934 @param arguments Command-line arguments as returned by
935 `ArgumentParser`
J. Richard Barnette02e82432015-10-13 16:02:47 -0700936 @return True if the arguments are semantically good, or False
937 if the arguments don't meet requirements.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700938
939 """
940 arguments.board_notify = _separate_email_addresses(
941 arguments.board_notify)
942 arguments.pool_notify = _separate_email_addresses(
943 arguments.pool_notify)
J. Richard Barnette02e82432015-10-13 16:02:47 -0700944 if not arguments.board_notify and not arguments.pool_notify:
945 if not arguments.debug:
946 sys.stderr.write('Must specify at least one of '
947 '--board-notify or --pool-notify\n')
948 return False
949 else:
950 # We want to run all the reports. An empty notify list
951 # will cause a report to be skipped, so make sure the
952 # lists are non-empty.
953 arguments.board_notify = ['']
954 arguments.pool_notify = ['']
955 return True
J. Richard Barnette96db3492015-03-27 17:23:52 -0700956
957
958def _get_logdir(script):
959 """Get the default directory for the `--logdir` option.
960
961 The default log directory is based on the parent directory
962 containing this script.
963
964 @param script Path to this script file.
965 @return A path to a directory.
966
967 """
968 basedir = os.path.dirname(os.path.abspath(script))
969 basedir = os.path.dirname(basedir)
970 return os.path.join(basedir, _LOGDIR)
971
972
973def _parse_command(argv):
974 """Parse the command line arguments.
975
976 Create an argument parser for this command's syntax, parse the
977 command line, and return the result of the ArgumentParser
978 parse_args() method.
979
980 @param argv Standard command line argument vector; argv[0] is
981 assumed to be the command name.
982 @return Result returned by ArgumentParser.parse_args().
983
984 """
985 parser = argparse.ArgumentParser(
986 prog=argv[0],
987 description='Gather and report lab inventory statistics')
988 parser.add_argument('-d', '--duration', type=int,
989 default=_DEFAULT_DURATION, metavar='HOURS',
990 help='number of hours back to search for status'
991 ' (default: %d)' % _DEFAULT_DURATION)
992 parser.add_argument('--board-notify', action='append',
993 default=[], metavar='ADDRESS',
994 help='Generate board inventory message, '
995 'and send it to the given e-mail address(es)')
996 parser.add_argument('--pool-notify', action='append',
997 default=[], metavar='ADDRESS',
998 help='Generate pool inventory message, '
999 'and send it to the given address(es)')
J. Richard Barnette1df6a562015-06-09 10:06:17 -07001000 parser.add_argument('-r', '--recommend', type=int, default=None,
J. Richard Barnettef6839282015-06-01 16:00:35 -07001001 help=('Specify how many DUTs should be '
J. Richard Barnette1df6a562015-06-09 10:06:17 -07001002 'recommended for repair (default: no '
1003 'recommendation)'))
J. Richard Barnette02e82432015-10-13 16:02:47 -07001004 parser.add_argument('--debug', action='store_true',
J. Richard Barnette96db3492015-03-27 17:23:52 -07001005 help='Print e-mail messages on stdout '
1006 'without sending them.')
1007 parser.add_argument('--logdir', default=_get_logdir(argv[0]),
1008 help='Directory where logs will be written.')
1009 parser.add_argument('boardnames', nargs='*',
1010 metavar='BOARD',
1011 help='names of boards to report on '
1012 '(default: all boards)')
1013 arguments = parser.parse_args(argv[1:])
J. Richard Barnette02e82432015-10-13 16:02:47 -07001014 if not _verify_arguments(arguments):
1015 return None
J. Richard Barnette96db3492015-03-27 17:23:52 -07001016 return arguments
1017
1018
1019def _configure_logging(arguments):
1020 """Configure the `logging` module for our needs.
1021
1022 How we log depends on whether the `--print` option was
1023 provided on the command line. Without the option, we log all
1024 messages at DEBUG level or above, and write them to a file in
1025 the directory specified by the `--logdir` option. With the
1026 option, we write log messages to stdout; messages below INFO
1027 level are discarded.
1028
1029 The log file is configured to rotate once a week on Friday
1030 evening, preserving ~3 months worth of history.
1031
1032 @param arguments Command-line arguments as returned by
1033 `ArgumentParser`
1034
1035 """
J. Richard Barnettef6839282015-06-01 16:00:35 -07001036 root_logger = logging.getLogger()
J. Richard Barnette02e82432015-10-13 16:02:47 -07001037 if arguments.debug:
J. Richard Barnettef6839282015-06-01 16:00:35 -07001038 root_logger.setLevel(logging.INFO)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001039 handler = logging.StreamHandler(sys.stdout)
1040 handler.setFormatter(logging.Formatter())
1041 else:
Richard Barnette5af97402016-04-18 11:00:26 -07001042 if not os.path.exists(arguments.logdir):
1043 os.mkdir(arguments.logdir)
J. Richard Barnettef6839282015-06-01 16:00:35 -07001044 root_logger.setLevel(logging.DEBUG)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001045 logfile = os.path.join(arguments.logdir, _LOGFILE)
1046 handler = logging.handlers.TimedRotatingFileHandler(
1047 logfile, when='W4', backupCount=13)
1048 formatter = logging.Formatter(_LOG_FORMAT,
1049 time_utils.TIME_FMT)
1050 handler.setFormatter(formatter)
J. Richard Barnettef6839282015-06-01 16:00:35 -07001051 # TODO(jrbarnette) This is gross. Importing client.bin.utils
1052 # implicitly imported logging_config, which calls
1053 # logging.basicConfig() *at module level*. That gives us an
1054 # extra logging handler that we don't want. So, clear out all
1055 # the handlers here.
1056 for h in root_logger.handlers:
1057 root_logger.removeHandler(h)
1058 root_logger.addHandler(handler)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001059
1060
1061def _populate_board_counts(inventory):
1062 """Gather board counts while providing interactive feedback.
1063
1064 Gathering the status of all individual DUTs in the lab can take
1065 considerable time (~30 minutes at the time of this writing).
1066
1067 Normally, we pay that cost by querying as we go. However, with
1068 the `--print` option, a human being may be watching the
1069 progress. So, we force the first (expensive) queries to happen
1070 up front, and provide a small ASCII progress bar to give an
1071 indicator of how many boards have been processed.
1072
1073 @param inventory _LabInventory object with the inventory to
1074 be gathered.
1075
1076 """
1077 n = 0
J. Richard Barnettef6839282015-06-01 16:00:35 -07001078 total_broken = 0
J. Richard Barnette96db3492015-03-27 17:23:52 -07001079 for counts in inventory.values():
1080 n += 1
1081 if n % 10 == 5:
1082 c = '+'
1083 elif n % 10 == 0:
1084 c = '%d' % ((n / 10) % 10)
1085 else:
1086 c = '.'
1087 sys.stdout.write(c)
1088 sys.stdout.flush()
1089 # This next call is where all the time goes - it forces all
1090 # of a board's HostJobHistory objects to query the database
1091 # and cache their results.
J. Richard Barnettef6839282015-06-01 16:00:35 -07001092 total_broken += counts.get_broken()
J. Richard Barnette96db3492015-03-27 17:23:52 -07001093 sys.stdout.write('\n')
J. Richard Barnettef6839282015-06-01 16:00:35 -07001094 sys.stdout.write('Found %d broken DUTs\n' % total_broken)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001095
1096
1097def main(argv):
1098 """Standard main routine.
1099 @param argv Command line arguments including `sys.argv[0]`.
1100 """
1101 arguments = _parse_command(argv)
J. Richard Barnette02e82432015-10-13 16:02:47 -07001102 if not arguments:
1103 sys.exit(1)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001104 _configure_logging(arguments)
1105 try:
1106 end_time = int(time.time())
1107 start_time = end_time - arguments.duration * 60 * 60
1108 timestamp = time.strftime('%Y-%m-%d.%H',
1109 time.localtime(end_time))
1110 logging.debug('Starting lab inventory for %s', timestamp)
1111 if arguments.board_notify:
J. Richard Barnette1df6a562015-06-09 10:06:17 -07001112 if arguments.recommend:
1113 logging.debug('Will include repair recommendations')
J. Richard Barnette96db3492015-03-27 17:23:52 -07001114 logging.debug('Will include board inventory')
1115 if arguments.pool_notify:
1116 logging.debug('Will include pool inventory')
1117
J. Richard Barnettea7c514e2015-09-15 11:13:23 -07001118 afe = frontend_wrappers.RetryingAFE(server=None)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001119 inventory = _LabInventory.create_inventory(
1120 afe, start_time, end_time, arguments.boardnames)
1121 logging.info('Found %d hosts across %d boards',
1122 inventory.get_num_duts(),
1123 inventory.get_num_boards())
1124
J. Richard Barnette02e82432015-10-13 16:02:47 -07001125 if arguments.debug:
J. Richard Barnette96db3492015-03-27 17:23:52 -07001126 _populate_board_counts(inventory)
1127
J. Richard Barnette02e82432015-10-13 16:02:47 -07001128 if arguments.board_notify:
J. Richard Barnette1df6a562015-06-09 10:06:17 -07001129 if arguments.recommend:
1130 recommend_message = _generate_repair_recommendation(
1131 inventory, arguments.recommend) + '\n\n\n'
1132 else:
1133 recommend_message = ''
1134 board_message = _generate_board_inventory_message(inventory)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001135 _send_email(arguments,
1136 'boards-%s.txt' % timestamp,
1137 'DUT board inventory %s' % timestamp,
1138 arguments.board_notify,
J. Richard Barnette02e82432015-10-13 16:02:47 -07001139 recommend_message + board_message)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001140
J. Richard Barnette02e82432015-10-13 16:02:47 -07001141 if arguments.pool_notify:
xixuan12ce04f2016-03-10 13:16:30 -08001142 pool_message = _generate_pool_inventory_message(inventory)
1143 idle_message = _generate_idle_inventory_message(inventory)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001144 _send_email(arguments,
1145 'pools-%s.txt' % timestamp,
1146 'DUT pool inventory %s' % timestamp,
1147 arguments.pool_notify,
xixuan12ce04f2016-03-10 13:16:30 -08001148 pool_message + '\n\n\n' + idle_message)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001149 except KeyboardInterrupt:
1150 pass
1151 except EnvironmentError as e:
1152 logging.exception('Unexpected OS error: %s', e)
1153 except Exception as e:
1154 logging.exception('Unexpected exception: %s', e)
1155
1156
Kevin Chengcf0ad2b2016-04-19 14:51:39 -07001157def get_inventory(afe):
J. Richard Barnetteaa868932015-10-23 13:28:59 -07001158 end_time = int(time.time())
1159 start_time = end_time - 24 * 60 * 60
Kevin Chengcf0ad2b2016-04-19 14:51:39 -07001160 return _LabInventory.create_inventory(afe, start_time, end_time)
1161
1162
1163def get_managed_boards(afe):
1164 return get_inventory(afe).get_managed_boards()
J. Richard Barnetteaa868932015-10-23 13:28:59 -07001165
1166
J. Richard Barnette96db3492015-03-27 17:23:52 -07001167if __name__ == '__main__':
1168 main(sys.argv)