blob: e9273ec1ebfcaede1c73473283df0ce5c2d8f6c9 [file] [log] [blame]
J. Richard Barnette96db3492015-03-27 17:23:52 -07001#!/usr/bin/env python
2# Copyright 2015 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Create e-mail reports of the Lab's DUT inventory.
7
8Gathers a list of all DUTs of interest in the Lab, segregated by
Richard Barnette5de01eb2017-12-15 09:53:42 -08009model and pool, and determines whether each DUT is working or
J. Richard Barnette96db3492015-03-27 17:23:52 -070010broken. Then, send one or more e-mail reports summarizing the
11status to e-mail addresses provided on the command line.
12
Richard Barnette5de01eb2017-12-15 09:53:42 -080013usage: lab_inventory.py [ options ] [ model ... ]
J. Richard Barnette96db3492015-03-27 17:23:52 -070014
15Options:
16--duration / -d <hours>
17 How far back in time to search job history to determine DUT
18 status.
19
Richard Barnette5de01eb2017-12-15 09:53:42 -080020--model-notify <address>[,<address>]
21 Send the "model status" e-mail to all the specified e-mail
J. Richard Barnette96db3492015-03-27 17:23:52 -070022 addresses.
23
24--pool-notify <address>[,<address>]
25 Send the "pool status" e-mail to all the specified e-mail
26 addresses.
27
J. Richard Barnette1df6a562015-06-09 10:06:17 -070028--recommend <number>
Richard Barnette5de01eb2017-12-15 09:53:42 -080029 When generating the "model status" e-mail, include a list of
J. Richard Barnette1df6a562015-06-09 10:06:17 -070030 <number> specific DUTs to be recommended for repair.
31
Richard Barnettecf5d8342017-10-24 18:13:11 -070032--repair-loops
33 Scan the inventory for DUTs stuck in repair loops, and report them
34 via a Monarch presence metric.
35
J. Richard Barnette96db3492015-03-27 17:23:52 -070036--logdir <directory>
37 Log progress and actions in a file under this directory. Text
38 of any e-mail sent will also be logged in a timestamped file in
39 this directory.
40
J. Richard Barnette02e82432015-10-13 16:02:47 -070041--debug
Richard Barnettecf5d8342017-10-24 18:13:11 -070042 Suppress all logging, metrics reporting, and sending e-mail.
43 Instead, write the output that would be generated onto stdout.
J. Richard Barnette96db3492015-03-27 17:23:52 -070044
Richard Barnette5de01eb2017-12-15 09:53:42 -080045<model> arguments:
46 With no arguments, gathers the status for all models in the lab.
47 With one or more named models on the command line, restricts
48 reporting to just those models.
J. Richard Barnette96db3492015-03-27 17:23:52 -070049
50"""
51
52
53import argparse
Prathmesh Prabhu021e7842017-11-08 18:05:45 -080054import collections
J. Richard Barnette96db3492015-03-27 17:23:52 -070055import logging
56import logging.handlers
57import os
J. Richard Barnettef6839282015-06-01 16:00:35 -070058import re
J. Richard Barnette96db3492015-03-27 17:23:52 -070059import sys
60import time
61
62import common
J. Richard Barnettef6839282015-06-01 16:00:35 -070063from autotest_lib.client.bin import utils
J. Richard Barnette96db3492015-03-27 17:23:52 -070064from autotest_lib.client.common_lib import time_utils
Xixuan Wu93e646c2017-12-07 18:36:10 -080065from autotest_lib.server import constants
Richard Barnettecf5d8342017-10-24 18:13:11 -070066from autotest_lib.server import site_utils
J. Richard Barnettea7c514e2015-09-15 11:13:23 -070067from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
J. Richard Barnettef6839282015-06-01 16:00:35 -070068from autotest_lib.server.hosts import servo_host
Aviv Keshet7ee95862016-08-30 15:18:27 -070069from autotest_lib.server.lib import status_history
J. Richard Barnette96db3492015-03-27 17:23:52 -070070from autotest_lib.site_utils import gmail_lib
Richard Barnettecf5d8342017-10-24 18:13:11 -070071from chromite.lib import metrics
J. Richard Barnette96db3492015-03-27 17:23:52 -070072
73
Richard Barnette673573b2016-12-12 09:46:39 -080074CRITICAL_POOLS = constants.Pools.CRITICAL_POOLS
75SPARE_POOL = constants.Pools.SPARE_POOL
76MANAGED_POOLS = constants.Pools.MANAGED_POOLS
J. Richard Barnette96db3492015-03-27 17:23:52 -070077
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -070078# _EXCLUDED_LABELS - A set of labels that disqualify a DUT from
Richard Barnetteeabcf392017-09-01 15:10:54 -070079# monitoring by this script. Currently, we're excluding these:
80# + 'adb' - We're not ready to monitor Android or Brillo hosts.
81# + 'board:guado_moblab' - These are maintained by a separate
82# process that doesn't use this script.
Kevin Chengcf0ad2b2016-04-19 14:51:39 -070083
Richard Barnetteeabcf392017-09-01 15:10:54 -070084_EXCLUDED_LABELS = {'adb', 'board:guado_moblab'}
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -070085
J. Richard Barnette96db3492015-03-27 17:23:52 -070086# _DEFAULT_DURATION:
87# Default value used for the --duration command line option.
88# Specifies how far back in time to search in order to determine
89# DUT status.
90
91_DEFAULT_DURATION = 24
92
J. Richard Barnette96db3492015-03-27 17:23:52 -070093# _LOGDIR:
Richard Barnettecf5d8342017-10-24 18:13:11 -070094# Relative path used in the calculation of the default setting for
95# the --logdir option. The full path is relative to the root of the
96# autotest directory, as determined from sys.argv[0].
J. Richard Barnette96db3492015-03-27 17:23:52 -070097# _LOGFILE:
98# Basename of a file to which general log information will be
99# written.
100# _LOG_FORMAT:
101# Format string for log messages.
102
103_LOGDIR = os.path.join('logs', 'dut-data')
104_LOGFILE = 'lab-inventory.log'
105_LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s'
106
J. Richard Barnettef6839282015-06-01 16:00:35 -0700107# Pattern describing location-based host names in the Chrome OS test
108# labs. Each DUT hostname designates the DUT's location:
109# * A lab (room) that's physically separated from other labs
110# (i.e. there's a door).
111# * A row (or aisle) of DUTs within the lab.
112# * A vertical rack of shelves on the row.
113# * A specific host on one shelf of the rack.
114
115_HOSTNAME_PATTERN = re.compile(
116 r'(chromeos\d+)-row(\d+)-rack(\d+)-host(\d+)')
117
Richard Barnettecf5d8342017-10-24 18:13:11 -0700118# _REPAIR_LOOP_THRESHOLD:
119# The number of repeated Repair tasks that must be seen to declare
120# that a DUT is stuck in a repair loop.
121
122_REPAIR_LOOP_THRESHOLD = 4
123
J. Richard Barnette96db3492015-03-27 17:23:52 -0700124
Richard Barnette5de01eb2017-12-15 09:53:42 -0800125class _HostSetInventory(object):
126 """Maintains a set of related `HostJobHistory` objects.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700127
Richard Barnette5de01eb2017-12-15 09:53:42 -0800128 The collection is segregated into disjoint categories of "working",
129 "broken", and "idle" DUTs. Accessor methods allow finding both the
130 list of DUTs in each category, as well as counts of each category.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700131
J. Richard Barnettef6839282015-06-01 16:00:35 -0700132 Performance note: Certain methods in this class are potentially
133 expensive:
134 * `get_working()`
135 * `get_working_list()`
136 * `get_broken()`
137 * `get_broken_list()`
xixuan12ce04f2016-03-10 13:16:30 -0800138 * `get_idle()`
139 * `get_idle_list()`
J. Richard Barnettef6839282015-06-01 16:00:35 -0700140 The first time any one of these methods is called, it causes
141 multiple RPC calls with a relatively expensive set of database
142 queries. However, the results of the queries are cached in the
143 individual `HostJobHistory` objects, so only the first call
144 actually pays the full cost.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700145
xixuan12ce04f2016-03-10 13:16:30 -0800146 Additionally, `get_working_list()`, `get_broken_list()` and
147 `get_idle_list()` cache their return values to avoid recalculating
Richard Barnette5de01eb2017-12-15 09:53:42 -0800148 lists at every call; this caching is separate from the caching of
149 RPC results described above.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700150
Richard Barnette5de01eb2017-12-15 09:53:42 -0800151 This class is deliberately constructed to delay the RPC cost until
152 the accessor methods are called (rather than to query in
J. Richard Barnette96db3492015-03-27 17:23:52 -0700153 `record_host()`) so that it's possible to construct a complete
154 `_LabInventory` without making the expensive queries at creation
Richard Barnette5de01eb2017-12-15 09:53:42 -0800155 time. `_populate_model_counts()`, below, assumes this behavior.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700156
Richard Barnette5de01eb2017-12-15 09:53:42 -0800157 Current usage of this class is that all DUTs are part of a single
158 scheduling pool of DUTs; however, this class make no assumptions
159 about the actual relationship among the DUTs.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700160 """
161
162 def __init__(self):
163 self._histories = []
J. Richard Barnettef6839282015-06-01 16:00:35 -0700164 self._working_list = None
165 self._broken_list = None
xixuan12ce04f2016-03-10 13:16:30 -0800166 self._idle_list = None
J. Richard Barnette96db3492015-03-27 17:23:52 -0700167
168
169 def record_host(self, host_history):
170 """Add one `HostJobHistory` object to the collection.
171
172 @param host_history The `HostJobHistory` object to be
173 remembered.
174
175 """
J. Richard Barnettef6839282015-06-01 16:00:35 -0700176 self._working_list = None
177 self._broken_list = None
xixuan12ce04f2016-03-10 13:16:30 -0800178 self._idle_list = None
J. Richard Barnette96db3492015-03-27 17:23:52 -0700179 self._histories.append(host_history)
180
181
J. Richard Barnettef6839282015-06-01 16:00:35 -0700182 def get_working_list(self):
183 """Return a list of all working DUTs in the pool.
184
185 Filter `self._histories` for histories where the last
186 diagnosis is `WORKING`.
187
188 Cache the result so that we only cacluate it once.
189
190 @return A list of HostJobHistory objects.
191
192 """
193 if self._working_list is None:
194 self._working_list = [h for h in self._histories
195 if h.last_diagnosis()[0] == status_history.WORKING]
196 return self._working_list
197
198
J. Richard Barnette96db3492015-03-27 17:23:52 -0700199 def get_working(self):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700200 """Return the number of working DUTs in the pool."""
201 return len(self.get_working_list())
202
203
204 def get_broken_list(self):
205 """Return a list of all broken DUTs in the pool.
206
207 Filter `self._histories` for histories where the last
xixuan12ce04f2016-03-10 13:16:30 -0800208 diagnosis is `BROKEN`.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700209
210 Cache the result so that we only cacluate it once.
211
212 @return A list of HostJobHistory objects.
213
214 """
215 if self._broken_list is None:
216 self._broken_list = [h for h in self._histories
xixuan12ce04f2016-03-10 13:16:30 -0800217 if h.last_diagnosis()[0] == status_history.BROKEN]
J. Richard Barnettef6839282015-06-01 16:00:35 -0700218 return self._broken_list
J. Richard Barnette96db3492015-03-27 17:23:52 -0700219
220
221 def get_broken(self):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700222 """Return the number of broken DUTs in the pool."""
223 return len(self.get_broken_list())
J. Richard Barnette96db3492015-03-27 17:23:52 -0700224
225
xixuan12ce04f2016-03-10 13:16:30 -0800226 def get_idle_list(self):
227 """Return a list of all idle DUTs in the pool.
228
229 Filter `self._histories` for histories where the last
230 diagnosis is `UNUSED` or `UNKNOWN`.
231
232 Cache the result so that we only cacluate it once.
233
234 @return A list of HostJobHistory objects.
235
236 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800237 idle_statuses = {status_history.UNUSED, status_history.UNKNOWN}
xixuan12ce04f2016-03-10 13:16:30 -0800238 if self._idle_list is None:
239 self._idle_list = [h for h in self._histories
Richard Barnette5de01eb2017-12-15 09:53:42 -0800240 if h.last_diagnosis()[0] in idle_statuses]
xixuan12ce04f2016-03-10 13:16:30 -0800241 return self._idle_list
242
243
244 def get_idle(self):
245 """Return the number of idle DUTs in the pool."""
246 return len(self.get_idle_list())
247
248
J. Richard Barnette96db3492015-03-27 17:23:52 -0700249 def get_total(self):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700250 """Return the total number of DUTs in the pool."""
J. Richard Barnette96db3492015-03-27 17:23:52 -0700251 return len(self._histories)
252
253
Richard Barnette5de01eb2017-12-15 09:53:42 -0800254class _PoolSetInventory(object):
255 """Maintains a set of `HostJobHistory`s for a set of pools.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700256
Richard Barnette5de01eb2017-12-15 09:53:42 -0800257 The collection is segregated into disjoint categories of "working",
258 "broken", and "idle" DUTs. Accessor methods allow finding both the
259 list of DUTs in each category, as well as counts of each category.
260 Accessor queries can be for an individual pool, or against all
261 pools.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700262
Richard Barnette5de01eb2017-12-15 09:53:42 -0800263 Performance note: This class relies on `_HostSetInventory`. Public
264 methods in this class generally rely on methods of the same name in
265 the underlying class, and so will have the same underlying
266 performance characteristics.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700267 """
268
Richard Barnette5de01eb2017-12-15 09:53:42 -0800269 def __init__(self, pools):
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800270 self._histories_by_pool = {
Richard Barnette5de01eb2017-12-15 09:53:42 -0800271 pool: _HostSetInventory() for pool in pools
J. Richard Barnette96db3492015-03-27 17:23:52 -0700272 }
273
274 def record_host(self, host_history):
275 """Add one `HostJobHistory` object to the collection.
276
277 @param host_history The `HostJobHistory` object to be
278 remembered.
279
280 """
J. Richard Barnette3d0590a2015-04-29 12:56:12 -0700281 pool = host_history.host_pool
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800282 self._histories_by_pool[pool].record_host(host_history)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700283
284
285 def _count_pool(self, get_pool_count, pool=None):
286 """Internal helper to count hosts in a given pool.
287
288 The `get_pool_count` parameter is a function to calculate
289 the exact count of interest for the pool.
290
291 @param get_pool_count Function to return a count from a
292 _PoolCount object.
293 @param pool The pool to be counted. If `None`,
294 return the total across all pools.
295
296 """
297 if pool is None:
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800298 return sum([get_pool_count(cached_history) for cached_history in
299 self._histories_by_pool.values()])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700300 else:
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800301 return get_pool_count(self._histories_by_pool[pool])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700302
303
J. Richard Barnettef6839282015-06-01 16:00:35 -0700304 def get_working_list(self):
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800305 """Return a list of all working DUTs (across all pools).
J. Richard Barnettef6839282015-06-01 16:00:35 -0700306
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800307 Go through all HostJobHistory objects across all pools, selecting the
308 ones where the last diagnosis is `WORKING`.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700309
310 @return A list of HostJobHistory objects.
311
312 """
313 l = []
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800314 for p in self._histories_by_pool.values():
J. Richard Barnettef6839282015-06-01 16:00:35 -0700315 l.extend(p.get_working_list())
316 return l
317
318
J. Richard Barnette96db3492015-03-27 17:23:52 -0700319 def get_working(self, pool=None):
320 """Return the number of working DUTs in a pool.
321
322 @param pool The pool to be counted. If `None`, return the
323 total across all pools.
324
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700325 @return The total number of working DUTs in the selected
326 pool(s).
J. Richard Barnette96db3492015-03-27 17:23:52 -0700327 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800328 return self._count_pool(_HostSetInventory.get_working, pool)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700329
330
J. Richard Barnettef6839282015-06-01 16:00:35 -0700331 def get_broken_list(self):
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800332 """Return a list of all broken DUTs (across all pools).
J. Richard Barnettef6839282015-06-01 16:00:35 -0700333
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800334 Go through all HostJobHistory objects in the across all pools,
xixuan12ce04f2016-03-10 13:16:30 -0800335 selecting the ones where the last diagnosis is `BROKEN`.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700336
337 @return A list of HostJobHistory objects.
338
339 """
340 l = []
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800341 for p in self._histories_by_pool.values():
J. Richard Barnettef6839282015-06-01 16:00:35 -0700342 l.extend(p.get_broken_list())
343 return l
344
345
J. Richard Barnette96db3492015-03-27 17:23:52 -0700346 def get_broken(self, pool=None):
347 """Return the number of broken DUTs in a pool.
348
349 @param pool The pool to be counted. If `None`, return the
350 total across all pools.
351
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700352 @return The total number of broken DUTs in the selected pool(s).
J. Richard Barnette96db3492015-03-27 17:23:52 -0700353 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800354 return self._count_pool(_HostSetInventory.get_broken, pool)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700355
356
xixuan12ce04f2016-03-10 13:16:30 -0800357 def get_idle_list(self, pool=None):
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800358 """Return a list of all idle DUTs in the given pool.
xixuan12ce04f2016-03-10 13:16:30 -0800359
Prathmesh Prabhu0ecbf322017-11-08 17:04:24 -0800360 Go through all HostJobHistory objects in the given pool, selecting the
361 ones where the last diagnosis is `UNUSED` or `UNKNOWN`.
xixuan12ce04f2016-03-10 13:16:30 -0800362
363 @param pool: The pool to be counted. If `None`, return the total list
364 across all pools.
365
366 @return A list of HostJobHistory objects.
367
368 """
369 if pool is None:
370 l = []
Richard Barnette5de01eb2017-12-15 09:53:42 -0800371 for p in self._histories_by_pool.itervalues():
xixuan12ce04f2016-03-10 13:16:30 -0800372 l.extend(p.get_idle_list())
373 return l
374 else:
Richard Barnette5de01eb2017-12-15 09:53:42 -0800375 return self._histories_by_pool[pool].get_idle_list()
xixuan12ce04f2016-03-10 13:16:30 -0800376
377
378 def get_idle(self, pool=None):
379 """Return the number of idle DUTs in a pool.
380
381 @param pool: The pool to be counted. If `None`, return the total
382 across all pools.
383
384 @return The total number of idle DUTs in the selected pool(s).
385 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800386 return self._count_pool(_HostSetInventory.get_idle, pool)
xixuan12ce04f2016-03-10 13:16:30 -0800387
388
Richard Barnette5de01eb2017-12-15 09:53:42 -0800389 def get_spares_buffer(self, spare_pool=SPARE_POOL):
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700390 """Return the the nominal number of working spares.
391
392 Calculates and returns how many working spares there would
393 be in the spares pool if all broken DUTs were in the spares
394 pool. This number may be negative, indicating a shortfall
395 in the critical pools.
396
397 @return The total number DUTs in the spares pool, less the total
398 number of broken DUTs in all pools.
399 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800400 return self.get_total(spare_pool) - self.get_broken()
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700401
402
J. Richard Barnette96db3492015-03-27 17:23:52 -0700403 def get_total(self, pool=None):
404 """Return the total number of DUTs in a pool.
405
406 @param pool The pool to be counted. If `None`, return the
407 total across all pools.
408
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700409 @return The total number of DUTs in the selected pool(s).
J. Richard Barnette96db3492015-03-27 17:23:52 -0700410 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800411 return self._count_pool(_HostSetInventory.get_total, pool)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700412
413
Richard Barnette5de01eb2017-12-15 09:53:42 -0800414def _eligible_host(afehost):
415 """Return whether this host is eligible for monitoring.
416
417 A host is eligible if it has a (unique) 'model' label, it's in
418 exactly one pool, and it has no labels from the
419 `_EXCLUDED_LABELS` set.
420
421 @param afehost The host to be tested for eligibility.
422 """
423 # DUTs without an existing, unique 'model' or 'pool' label
424 # aren't meant to exist in the managed inventory; their presence
425 # generally indicates an error in the database. Unfortunately
426 # such errors have been seen to occur from time to time.
427 #
428 # The _LabInventory constructor requires hosts to conform to the
429 # label restrictions, and may fail if they don't. Failing an
430 # inventory run for a single bad entry is the wrong thing, so we
431 # ignore the problem children here, to keep them out of the
432 # inventory.
433 models = [l for l in afehost.labels
434 if l.startswith(constants.Labels.MODEL_PREFIX)]
435 pools = [l for l in afehost.labels
436 if l.startswith(constants.Labels.POOL_PREFIX)]
437 excluded = _EXCLUDED_LABELS.intersection(afehost.labels)
438 return len(models) == 1 and len(pools) == 1 and not excluded
439
440
441class _LabInventory(collections.Mapping):
J. Richard Barnette96db3492015-03-27 17:23:52 -0700442 """Collection of `HostJobHistory` objects for the Lab's inventory.
443
Richard Barnette5de01eb2017-12-15 09:53:42 -0800444 This is a dict-like collection indexed by model. Indexing returns
445 the _PoolSetInventory object associated with the model.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700446 """
447
448 @classmethod
Richard Barnette5de01eb2017-12-15 09:53:42 -0800449 def create_inventory(cls, afe, start_time, end_time, modellist=[]):
J. Richard Barnette96db3492015-03-27 17:23:52 -0700450 """Return a Lab inventory with specified parameters.
451
Richard Barnette5de01eb2017-12-15 09:53:42 -0800452 By default, gathers inventory from `HostJobHistory` objects for
453 all DUTs in the `MANAGED_POOLS` list. If `modellist` is
454 supplied, the inventory will be restricted to only the given
455 models.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700456
Richard Barnette5de01eb2017-12-15 09:53:42 -0800457 @param afe AFE object for constructing the
458 `HostJobHistory` objects.
459 @param start_time Start time for the `HostJobHistory` objects.
460 @param end_time End time for the `HostJobHistory` objects.
461 @param modellist List of models to include. If empty,
462 include all available models.
463 @return A `_LabInventory` object for the specified models.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700464
465 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800466 target_pools = MANAGED_POOLS
467 label_list = [constants.Labels.POOL_PREFIX + l for l in target_pools]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700468 afehosts = afe.get_hosts(labels__name__in=label_list)
Richard Barnette5de01eb2017-12-15 09:53:42 -0800469 if modellist:
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -0700470 # We're deliberately not checking host eligibility in this
471 # code path. This is a debug path, not used in production;
472 # it may be useful to include ineligible hosts here.
Richard Barnette5de01eb2017-12-15 09:53:42 -0800473 modelhosts = []
474 for model in modellist:
475 model_label = constants.Labels.MODEL_PREFIX + model
J. Richard Barnette96db3492015-03-27 17:23:52 -0700476 host_list = [h for h in afehosts
Richard Barnette5de01eb2017-12-15 09:53:42 -0800477 if model_label in h.labels]
478 modelhosts.extend(host_list)
479 afehosts = modelhosts
J. Richard Barnetteb8bc570c2016-03-17 17:03:57 -0700480 else:
481 afehosts = [h for h in afehosts if cls._eligible_host(h)]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700482 create = lambda host: (
483 status_history.HostJobHistory(afe, host,
484 start_time, end_time))
Richard Barnette5de01eb2017-12-15 09:53:42 -0800485 return cls([create(host) for host in afehosts], target_pools)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700486
487
Richard Barnette5de01eb2017-12-15 09:53:42 -0800488 def __init__(self, histories, pools):
489 models = {h.host_model for h in histories}
490 self._modeldata = {model: _PoolSetInventory(pools) for model in models}
J. Richard Barnette96db3492015-03-27 17:23:52 -0700491 self._dut_count = len(histories)
Richard Barnette5de01eb2017-12-15 09:53:42 -0800492 for h in histories:
493 self[h.host_model].record_host(h)
494 self._boards = {h.host_board for h in histories}
Prathmesh Prabhu154cb2b2017-11-08 17:36:51 -0800495
496
Richard Barnette5de01eb2017-12-15 09:53:42 -0800497 def __getitem__(self, key):
498 return self._modeldata.__getitem__(key)
Prathmesh Prabhu021e7842017-11-08 18:05:45 -0800499
Richard Barnette5de01eb2017-12-15 09:53:42 -0800500
501 def __len__(self):
502 return self._modeldata.__len__()
503
504
505 def __iter__(self):
506 return self._modeldata.__iter__()
507
508
509 def reportable_items(self, spare_pool=SPARE_POOL):
510 """Iterate over all items subject to reporting.
511
512 Yields the contents of `self.iteritems()` filtered to include
513 only reportable models. A model is reportable if it has DUTs in
514 both `spare_pool` and at least one other pool.
515
516 @param spare_pool The spare pool to be tested for reporting.
Prathmesh Prabhu021e7842017-11-08 18:05:45 -0800517 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800518 for model, histories in self.iteritems():
519 spares = histories.get_total(spare_pool)
Prathmesh Prabhua5a0e3d2017-11-09 08:53:53 -0800520 total = histories.get_total()
Richard Barnette5de01eb2017-12-15 09:53:42 -0800521 if spares != 0 and spares != total:
522 yield model, histories
J. Richard Barnettef6839282015-06-01 16:00:35 -0700523
524
J. Richard Barnette96db3492015-03-27 17:23:52 -0700525 def get_num_duts(self):
526 """Return the total number of DUTs in the inventory."""
527 return self._dut_count
528
529
Prathmesh Prabhua5a0e3d2017-11-09 08:53:53 -0800530 def get_num_models(self):
531 """Return the total number of models in the inventory."""
Richard Barnette5de01eb2017-12-15 09:53:42 -0800532 return len(self)
533
534
535 def get_pool_models(self, pool):
536 """Return all models in `pool`.
537
538 @param pool The pool to be inventoried for models.
539 """
540 return {m for m, h in self.iteritems() if h.get_total(pool)}
541
542
543 def get_boards(self):
544 return self._boards
Prathmesh Prabhua5a0e3d2017-11-09 08:53:53 -0800545
546
J. Richard Barnettef6839282015-06-01 16:00:35 -0700547def _sort_by_location(inventory_list):
548 """Return a list of DUTs, organized by location.
549
550 Take the given list of `HostJobHistory` objects, separate it
551 into a list per lab, and sort each lab's list by location. The
552 order of sorting within a lab is
553 * By row number within the lab,
554 * then by rack number within the row,
555 * then by host shelf number within the rack.
556
557 Return a list of the sorted lists.
558
559 Implementation note: host locations are sorted by converting
560 each location into a base 100 number. If row, rack or
561 host numbers exceed the range [0..99], then sorting will
562 break down.
563
564 @return A list of sorted lists of DUTs.
565
566 """
567 BASE = 100
568 lab_lists = {}
569 for history in inventory_list:
570 location = _HOSTNAME_PATTERN.match(history.host.hostname)
571 if location:
572 lab = location.group(1)
573 key = 0
574 for idx in location.group(2, 3, 4):
575 key = BASE * key + int(idx)
576 lab_lists.setdefault(lab, []).append((key, history))
577 return_list = []
578 for dut_list in lab_lists.values():
579 dut_list.sort(key=lambda t: t[0])
580 return_list.append([t[1] for t in dut_list])
581 return return_list
582
583
584def _score_repair_set(buffer_counts, repair_list):
585 """Return a numeric score rating a set of DUTs to be repaired.
586
Richard Barnette5de01eb2017-12-15 09:53:42 -0800587 `buffer_counts` is a dictionary mapping model names to the size of
588 the model's spares buffer.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700589
Richard Barnette5de01eb2017-12-15 09:53:42 -0800590 `repair_list` is a list of `HostJobHistory` objects for the DUTs to
591 be repaired.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700592
593 This function calculates the new set of buffer counts that would
Richard Barnette5de01eb2017-12-15 09:53:42 -0800594 result from the proposed repairs, and scores the new set using two
595 numbers:
596 * Worst case buffer count for any model (higher is better). This
597 is the more significant number for comparison.
598 * Number of models at the worst case (lower is better). This is
599 the less significant number.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700600
Richard Barnette5de01eb2017-12-15 09:53:42 -0800601 Implementation note: The score could fail to reflect the intended
602 criteria if there are more than 1000 models in the inventory.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700603
Richard Barnette5de01eb2017-12-15 09:53:42 -0800604 @param spare_counts A dictionary mapping models to buffer counts.
605 @param repair_list A list of `HostJobHistory` objects for the
606 DUTs to be repaired.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700607 @return A numeric score.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700608 """
609 # Go through `buffer_counts`, and create a list of new counts
Richard Barnette5de01eb2017-12-15 09:53:42 -0800610 # that records the buffer count for each model after repair.
611 # The new list of counts discards the model names, as they don't
J. Richard Barnettef6839282015-06-01 16:00:35 -0700612 # contribute to the final score.
Richard Barnette5de01eb2017-12-15 09:53:42 -0800613 _NMODELS = 1000
614 pools = {h.host_pool for h in repair_list}
615 repair_inventory = _LabInventory(repair_list, pools)
J. Richard Barnettef6839282015-06-01 16:00:35 -0700616 new_counts = []
Richard Barnette5de01eb2017-12-15 09:53:42 -0800617 for m, c in buffer_counts.iteritems():
618 if m in repair_inventory:
619 newcount = repair_inventory[m].get_total()
J. Richard Barnettef6839282015-06-01 16:00:35 -0700620 else:
621 newcount = 0
622 new_counts.append(c + newcount)
623 # Go through the new list of counts. Find the worst available
624 # spares count, and count how many times that worst case occurs.
625 worst_count = new_counts[0]
626 num_worst = 1
627 for c in new_counts[1:]:
628 if c == worst_count:
629 num_worst += 1
630 elif c < worst_count:
631 worst_count = c
632 num_worst = 1
633 # Return the calculated score
Richard Barnette5de01eb2017-12-15 09:53:42 -0800634 return _NMODELS * worst_count - num_worst
J. Richard Barnettef6839282015-06-01 16:00:35 -0700635
636
637def _generate_repair_recommendation(inventory, num_recommend):
638 """Return a summary of selected DUTs needing repair.
639
Richard Barnette5de01eb2017-12-15 09:53:42 -0800640 Returns a message recommending a list of broken DUTs to be repaired.
641 The list of DUTs is selected based on these criteria:
J. Richard Barnettef6839282015-06-01 16:00:35 -0700642 * No more than `num_recommend` DUTs will be listed.
643 * All DUTs must be in the same lab.
Richard Barnette5de01eb2017-12-15 09:53:42 -0800644 * DUTs should be selected for some degree of physical proximity.
645 * DUTs for models with a low spares buffer are more important than
646 DUTs with larger buffers.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700647
Richard Barnette5de01eb2017-12-15 09:53:42 -0800648 The algorithm used will guarantee that at least one DUT from a model
649 with the lowest spares buffer will be recommended. If the worst
650 spares buffer number is shared by more than one model, the algorithm
651 will tend to prefer repair sets that include more of those models
652 over sets that cover fewer models.
J. Richard Barnettef6839282015-06-01 16:00:35 -0700653
Richard Barnette5de01eb2017-12-15 09:53:42 -0800654 @param inventory `_LabInventory` object from which to generate
655 recommendations.
J. Richard Barnette1df6a562015-06-09 10:06:17 -0700656 @param num_recommend Number of DUTs to recommend for repair.
657
J. Richard Barnettef6839282015-06-01 16:00:35 -0700658 """
659 logging.debug('Creating DUT repair recommendations')
Richard Barnette5de01eb2017-12-15 09:53:42 -0800660 model_buffer_counts = {}
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700661 broken_list = []
Richard Barnette5de01eb2017-12-15 09:53:42 -0800662 for model, counts in inventory.reportable_items():
663 logging.debug('Listing failed DUTs for %s', model)
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700664 if counts.get_broken() != 0:
Richard Barnette5de01eb2017-12-15 09:53:42 -0800665 model_buffer_counts[model] = counts.get_spares_buffer()
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700666 broken_list.extend(counts.get_broken_list())
J. Richard Barnette55127432015-10-13 17:01:56 -0700667 # N.B. The logic inside this loop may seem complicated, but
J. Richard Barnettef6839282015-06-01 16:00:35 -0700668 # simplification is hard:
669 # * Calculating an initial recommendation outside of
670 # the loop likely would make things more complicated,
671 # not less.
672 # * It's necessary to calculate an initial lab slice once per
673 # lab _before_ the while loop, in case the number of broken
674 # DUTs in a lab is less than `num_recommend`.
J. Richard Barnette55127432015-10-13 17:01:56 -0700675 recommendation = None
676 best_score = None
677 for lab_duts in _sort_by_location(broken_list):
J. Richard Barnettef6839282015-06-01 16:00:35 -0700678 start = 0
679 end = num_recommend
680 lab_slice = lab_duts[start : end]
Richard Barnette5de01eb2017-12-15 09:53:42 -0800681 lab_score = _score_repair_set(model_buffer_counts, lab_slice)
J. Richard Barnettef6839282015-06-01 16:00:35 -0700682 while end < len(lab_duts):
683 start += 1
684 end += 1
685 new_slice = lab_duts[start : end]
Richard Barnette5de01eb2017-12-15 09:53:42 -0800686 new_score = _score_repair_set(model_buffer_counts, new_slice)
J. Richard Barnettef6839282015-06-01 16:00:35 -0700687 if new_score > lab_score:
688 lab_slice = new_slice
689 lab_score = new_score
690 if recommendation is None or lab_score > best_score:
691 recommendation = lab_slice
692 best_score = lab_score
Richard Barnette5de01eb2017-12-15 09:53:42 -0800693 # N.B. The trailing space in `line_fmt` is manadatory: Without it,
694 # Gmail will parse the URL wrong. Don't ask. If you simply _must_
J. Richard Barnette5d0fa512016-04-05 17:39:52 -0700695 # know more, go try it yourself...
696 line_fmt = '%-30s %-16s %-6s\n %s '
J. Richard Barnette1df6a562015-06-09 10:06:17 -0700697 message = ['Repair recommendations:\n',
Richard Barnette5de01eb2017-12-15 09:53:42 -0800698 line_fmt % ( 'Hostname', 'Model', 'Servo?', 'Logs URL')]
J. Richard Barnettef6839282015-06-01 16:00:35 -0700699 for h in recommendation:
700 servo_name = servo_host.make_servo_hostname(h.host.hostname)
J. Richard Barnette5d0fa512016-04-05 17:39:52 -0700701 servo_present = utils.host_is_in_lab_zone(servo_name)
702 _, event = h.last_diagnosis()
703 line = line_fmt % (
Richard Barnette5de01eb2017-12-15 09:53:42 -0800704 h.host.hostname, h.host_model,
J. Richard Barnette5d0fa512016-04-05 17:39:52 -0700705 'Yes' if servo_present else 'No', event.job_url)
J. Richard Barnettef6839282015-06-01 16:00:35 -0700706 message.append(line)
707 return '\n'.join(message)
708
709
Richard Barnette5de01eb2017-12-15 09:53:42 -0800710def _generate_model_inventory_message(inventory):
711 """Generate the "model inventory" e-mail message.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700712
Richard Barnette5de01eb2017-12-15 09:53:42 -0800713 The model inventory is a list by model summarizing the number of
714 working, broken, and idle DUTs, and the total shortfall or surplus
J. Richard Barnette96db3492015-03-27 17:23:52 -0700715 of working devices relative to the minimum critical pool
716 requirement.
717
Richard Barnette5de01eb2017-12-15 09:53:42 -0800718 The report omits models with no DUTs in the spare pool or with no
719 DUTs in a critical pool.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700720
721 N.B. For sample output text formattted as users can expect to
722 see it in e-mail and log files, refer to the unit tests.
723
Richard Barnette5de01eb2017-12-15 09:53:42 -0800724 @param inventory `_LabInventory` object to be reported on.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700725 @return String with the inventory message to be sent.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700726 """
Richard Barnette5de01eb2017-12-15 09:53:42 -0800727 logging.debug('Creating model inventory')
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700728 nworking = 0
729 nbroken = 0
xixuan12ce04f2016-03-10 13:16:30 -0800730 nidle = 0
Richard Barnette5de01eb2017-12-15 09:53:42 -0800731 nbroken_models = 0
732 ntotal_models = 0
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700733 summaries = []
Richard Barnette5de01eb2017-12-15 09:53:42 -0800734 column_names = (
735 'Model', 'Avail', 'Bad', 'Idle', 'Good', 'Spare', 'Total')
736 for model, counts in inventory.reportable_items():
737 logging.debug('Counting %2d DUTS for model %s',
738 counts.get_total(), model)
739 # Summary elements laid out in the same order as the column
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700740 # headers:
Richard Barnette5de01eb2017-12-15 09:53:42 -0800741 # Model Avail Bad Idle Good Spare Total
xixuan12ce04f2016-03-10 13:16:30 -0800742 # e[0] e[1] e[2] e[3] e[4] e[5] e[6]
Richard Barnette5de01eb2017-12-15 09:53:42 -0800743 element = (model,
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700744 counts.get_spares_buffer(),
745 counts.get_broken(),
xixuan12ce04f2016-03-10 13:16:30 -0800746 counts.get_idle(),
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700747 counts.get_working(),
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700748 counts.get_total(SPARE_POOL),
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700749 counts.get_total())
J. Richard Barnetteea5a4ba2016-02-18 16:34:50 -0800750 if element[2]:
751 summaries.append(element)
Richard Barnette5de01eb2017-12-15 09:53:42 -0800752 nbroken_models += 1
753 ntotal_models += 1
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700754 nbroken += element[2]
xixuan12ce04f2016-03-10 13:16:30 -0800755 nidle += element[3]
756 nworking += element[4]
757 ntotal = nworking + nbroken + nidle
J. Richard Barnettee39c8272015-10-20 17:58:30 -0700758 summaries = sorted(summaries, key=lambda e: (e[1], -e[2]))
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700759 broken_percent = int(round(100.0 * nbroken / ntotal))
xixuan12ce04f2016-03-10 13:16:30 -0800760 idle_percent = int(round(100.0 * nidle / ntotal))
761 working_percent = 100 - broken_percent - idle_percent
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700762 message = ['Summary of DUTs in inventory:',
xixuan12ce04f2016-03-10 13:16:30 -0800763 '%10s %10s %10s %6s' % ('Bad', 'Idle', 'Good', 'Total'),
764 '%5d %3d%% %5d %3d%% %5d %3d%% %6d' % (
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700765 nbroken, broken_percent,
xixuan12ce04f2016-03-10 13:16:30 -0800766 nidle, idle_percent,
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700767 nworking, working_percent,
768 ntotal),
769 '',
Richard Barnette5de01eb2017-12-15 09:53:42 -0800770 'Models with failures: %d' % nbroken_models,
771 'Models in inventory: %d' % ntotal_models,
J. Richard Barnetted3ba33a2015-10-14 11:20:49 -0700772 '', '',
Richard Barnette5de01eb2017-12-15 09:53:42 -0800773 'Full model inventory:\n',
774 '%-22s %5s %5s %5s %5s %5s %5s' % column_names]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700775 message.extend(
xixuan12ce04f2016-03-10 13:16:30 -0800776 ['%-22s %5d %5d %5d %5d %5d %5d' % e for e in summaries])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700777 return '\n'.join(message)
778
779
J. Richard Barnette4845fcf2015-04-20 14:26:25 -0700780_POOL_INVENTORY_HEADER = '''\
Richard Barnette5de01eb2017-12-15 09:53:42 -0800781Notice to Infrastructure deputies: All models shown below are at
J. Richard Barnettec9a143c2015-06-04 11:11:19 -0700782less than full strength, please take action to resolve the issues.
783Once you're satisified that failures won't recur, failed DUTs can
784be replaced with spares by running `balance_pool`. Detailed
785instructions can be found here:
J. Richard Barnette4845fcf2015-04-20 14:26:25 -0700786 http://go/cros-manage-duts
787'''
788
789
J. Richard Barnette96db3492015-03-27 17:23:52 -0700790def _generate_pool_inventory_message(inventory):
791 """Generate the "pool inventory" e-mail message.
792
Richard Barnette5de01eb2017-12-15 09:53:42 -0800793 The pool inventory is a list by pool and model summarizing the
794 number of working and broken DUTs in the pool. Only models with
J. Richard Barnette96db3492015-03-27 17:23:52 -0700795 at least one broken DUT are included in the list.
796
Richard Barnette5de01eb2017-12-15 09:53:42 -0800797 N.B. For sample output text formattted as users can expect to see it
798 in e-mail and log files, refer to the unit tests.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700799
Richard Barnette5de01eb2017-12-15 09:53:42 -0800800 @param inventory `_LabInventory` object to be reported on.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700801 @return String with the inventory message to be sent.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700802 """
803 logging.debug('Creating pool inventory')
J. Richard Barnette4845fcf2015-04-20 14:26:25 -0700804 message = [_POOL_INVENTORY_HEADER]
J. Richard Barnette96db3492015-03-27 17:23:52 -0700805 newline = ''
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700806 for pool in CRITICAL_POOLS:
J. Richard Barnette96db3492015-03-27 17:23:52 -0700807 message.append(
Richard Barnette5de01eb2017-12-15 09:53:42 -0800808 '%sStatus for pool:%s, by model:' % (newline, pool))
J. Richard Barnette96db3492015-03-27 17:23:52 -0700809 message.append(
xixuan12ce04f2016-03-10 13:16:30 -0800810 '%-20s %5s %5s %5s %5s' % (
Richard Barnette5de01eb2017-12-15 09:53:42 -0800811 'Model', 'Bad', 'Idle', 'Good', 'Total'))
J. Richard Barnette96db3492015-03-27 17:23:52 -0700812 data_list = []
Richard Barnette5de01eb2017-12-15 09:53:42 -0800813 for model, counts in inventory.iteritems():
Richard Barnette254d5b42016-07-06 19:13:23 -0700814 logging.debug('Counting %2d DUTs for %s, %s',
Richard Barnette5de01eb2017-12-15 09:53:42 -0800815 counts.get_total(pool), model, pool)
J. Richard Barnette96db3492015-03-27 17:23:52 -0700816 broken = counts.get_broken(pool)
xixuan12ce04f2016-03-10 13:16:30 -0800817 idle = counts.get_idle(pool)
Richard Barnette5de01eb2017-12-15 09:53:42 -0800818 # models at full strength are not reported
819 if not broken and not idle:
J. Richard Barnette96db3492015-03-27 17:23:52 -0700820 continue
821 working = counts.get_working(pool)
822 total = counts.get_total(pool)
Richard Barnette5de01eb2017-12-15 09:53:42 -0800823 data_list.append((model, broken, idle, working, total))
J. Richard Barnette96db3492015-03-27 17:23:52 -0700824 if data_list:
825 data_list = sorted(data_list, key=lambda d: -d[1])
826 message.extend(
xixuan12ce04f2016-03-10 13:16:30 -0800827 ['%-20s %5d %5d %5d %5d' % t for t in data_list])
J. Richard Barnette96db3492015-03-27 17:23:52 -0700828 else:
Richard Barnette5de01eb2017-12-15 09:53:42 -0800829 message.append('(All models at full strength)')
J. Richard Barnette96db3492015-03-27 17:23:52 -0700830 newline = '\n'
831 return '\n'.join(message)
832
833
xixuan12ce04f2016-03-10 13:16:30 -0800834_IDLE_INVENTORY_HEADER = '''\
835Notice to Infrastructure deputies: The hosts shown below haven't
836run any jobs for at least 24 hours. Please check each host; locked
837hosts should normally be unlocked; stuck jobs should normally be
838aborted.
839'''
840
841
842def _generate_idle_inventory_message(inventory):
843 """Generate the "idle inventory" e-mail message.
844
Richard Barnette5de01eb2017-12-15 09:53:42 -0800845 The idle inventory is a host list with corresponding pool and model,
xixuan12ce04f2016-03-10 13:16:30 -0800846 where the hosts are idle (`UNKWOWN` or `UNUSED`).
847
848 N.B. For sample output text format as users can expect to
849 see it in e-mail and log files, refer to the unit tests.
850
Richard Barnette5de01eb2017-12-15 09:53:42 -0800851 @param inventory `_LabInventory` object to be reported on.
xixuan12ce04f2016-03-10 13:16:30 -0800852 @return String with the inventory message to be sent.
853
854 """
855 logging.debug('Creating idle inventory')
856 message = [_IDLE_INVENTORY_HEADER]
857 message.append('Idle Host List:')
Richard Barnette5de01eb2017-12-15 09:53:42 -0800858 message.append('%-30s %-20s %s' % ('Hostname', 'Model', 'Pool'))
xixuan12ce04f2016-03-10 13:16:30 -0800859 data_list = []
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700860 for pool in MANAGED_POOLS:
Richard Barnette5de01eb2017-12-15 09:53:42 -0800861 for model, counts in inventory.iteritems():
Richard Barnette254d5b42016-07-06 19:13:23 -0700862 logging.debug('Counting %2d DUTs for %s, %s',
Richard Barnette5de01eb2017-12-15 09:53:42 -0800863 counts.get_total(pool), model, pool)
864 data_list.extend([(dut.host.hostname, model, pool)
xixuan12ce04f2016-03-10 13:16:30 -0800865 for dut in counts.get_idle_list(pool)])
866 if data_list:
867 message.extend(['%-30s %-20s %s' % t for t in data_list])
868 else:
869 message.append('(No idle DUTs)')
870 return '\n'.join(message)
871
872
J. Richard Barnette96db3492015-03-27 17:23:52 -0700873def _send_email(arguments, tag, subject, recipients, body):
874 """Send an inventory e-mail message.
875
Richard Barnette5de01eb2017-12-15 09:53:42 -0800876 The message is logged in the selected log directory using `tag` for
877 the file name.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700878
Richard Barnette5de01eb2017-12-15 09:53:42 -0800879 If the --debug option was requested, the message is neither logged
880 nor sent, but merely printed on stdout.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700881
882 @param arguments Parsed command-line options.
883 @param tag Tag identifying the inventory for logging
884 purposes.
885 @param subject E-mail Subject: header line.
886 @param recipients E-mail addresses for the To: header line.
887 @param body E-mail message body.
J. Richard Barnette96db3492015-03-27 17:23:52 -0700888 """
889 logging.debug('Generating email: "%s"', subject)
890 all_recipients = ', '.join(recipients)
891 report_body = '\n'.join([
892 'To: %s' % all_recipients,
893 'Subject: %s' % subject,
894 '', body, ''])
J. Richard Barnette02e82432015-10-13 16:02:47 -0700895 if arguments.debug:
J. Richard Barnette96db3492015-03-27 17:23:52 -0700896 print report_body
897 else:
898 filename = os.path.join(arguments.logdir, tag)
899 try:
900 report_file = open(filename, 'w')
901 report_file.write(report_body)
902 report_file.close()
903 except EnvironmentError as e:
904 logging.error('Failed to write %s: %s', filename, e)
905 try:
906 gmail_lib.send_email(all_recipients, subject, body)
907 except Exception as e:
908 logging.error('Failed to send e-mail to %s: %s',
909 all_recipients, e)
910
911
Richard Barnette5de01eb2017-12-15 09:53:42 -0800912def _populate_model_counts(inventory):
913 """Gather model counts while providing interactive feedback.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700914
915 Gathering the status of all individual DUTs in the lab can take
916 considerable time (~30 minutes at the time of this writing).
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700917 Normally, we pay that cost by querying as we go. However, with
918 the `--debug` option, we expect a human being to be watching the
Richard Barnette5de01eb2017-12-15 09:53:42 -0800919 progress in real time. So, we force the first (expensive) queries
920 to happen up front, and provide simple ASCII output on sys.stdout
921 to show a progress bar and results.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700922
Richard Barnette5de01eb2017-12-15 09:53:42 -0800923 @param inventory `_LabInventory` object from which to gather
924 counts.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700925 """
926 n = 0
927 total_broken = 0
Richard Barnette5de01eb2017-12-15 09:53:42 -0800928 for counts in inventory.itervalues():
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700929 n += 1
930 if n % 10 == 5:
931 c = '+'
932 elif n % 10 == 0:
933 c = '%d' % ((n / 10) % 10)
934 else:
935 c = '.'
936 sys.stdout.write(c)
937 sys.stdout.flush()
Richard Barnette5de01eb2017-12-15 09:53:42 -0800938 # This next call is where all the time goes - it forces all of a
939 # model's `HostJobHistory` objects to query the database and
940 # cache their results.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700941 total_broken += counts.get_broken()
942 sys.stdout.write('\n')
943 sys.stdout.write('Found %d broken DUTs\n' % total_broken)
944
945
Richard Barnette5de01eb2017-12-15 09:53:42 -0800946def _perform_model_inventory(arguments, inventory, timestamp):
947 """Perform the model inventory report.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700948
Richard Barnette5de01eb2017-12-15 09:53:42 -0800949 The model inventory report consists of the following:
950 * A list of DUTs that are recommended to be repaired. This list
951 is optional, and only appears if the `--recommend` option is
952 present.
953 * A list of all models that have failed DUTs, with counts
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700954 of working, broken, and spare DUTs, among others.
955
956 @param arguments Command-line arguments as returned by
957 `ArgumentParser`
Richard Barnette5de01eb2017-12-15 09:53:42 -0800958 @param inventory `_LabInventory` object to be reported on.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700959 @param timestamp A string used to identify this run's timestamp
960 in logs and email output.
961 """
962 if arguments.recommend:
963 recommend_message = _generate_repair_recommendation(
964 inventory, arguments.recommend) + '\n\n\n'
965 else:
966 recommend_message = ''
Richard Barnette5de01eb2017-12-15 09:53:42 -0800967 model_message = _generate_model_inventory_message(inventory)
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700968 _send_email(arguments,
Richard Barnette5de01eb2017-12-15 09:53:42 -0800969 'models-%s.txt' % timestamp,
970 'DUT model inventory %s' % timestamp,
971 arguments.model_notify,
972 recommend_message + model_message)
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700973
974
975def _perform_pool_inventory(arguments, inventory, timestamp):
976 """Perform the pool inventory report.
977
978 The pool inventory report consists of the following:
979 * A list of all critical pools that have failed DUTs, with counts
980 of working, broken, and idle DUTs.
Richard Barnette5de01eb2017-12-15 09:53:42 -0800981 * A list of all idle DUTs by hostname including the model and
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700982 pool.
983
984 @param arguments Command-line arguments as returned by
985 `ArgumentParser`
Richard Barnette5de01eb2017-12-15 09:53:42 -0800986 @param inventory `_LabInventory` object to be reported on.
Richard Barnettecf5d8342017-10-24 18:13:11 -0700987 @param timestamp A string used to identify this run's timestamp in
988 logs and email output.
Richard Barnette3dcbb6a2017-10-23 17:57:50 -0700989 """
990 pool_message = _generate_pool_inventory_message(inventory)
991 idle_message = _generate_idle_inventory_message(inventory)
992 _send_email(arguments,
993 'pools-%s.txt' % timestamp,
994 'DUT pool inventory %s' % timestamp,
995 arguments.pool_notify,
996 pool_message + '\n\n\n' + idle_message)
997
998
Richard Barnettecf5d8342017-10-24 18:13:11 -0700999def _dut_in_repair_loop(history):
1000 """Return whether a DUT's history indicates a repair loop.
1001
1002 A DUT is considered looping if it runs no tests, and no tasks pass
1003 other than repair tasks.
1004
1005 @param history An instance of `status_history.HostJobHistory` to be
1006 scanned for a repair loop. The caller guarantees
1007 that this history corresponds to a working DUT.
1008 @returns Return a true value if the DUT's most recent history
1009 indicates a repair loop.
1010 """
1011 # Our caller passes only histories for working DUTs; that means
1012 # we've already paid the cost of fetching the diagnosis task, and
1013 # we know that the task was successful. The diagnosis task will be
1014 # one of the tasks we must scan to find a loop, so if the task isn't
1015 # a repair task, then our history includes a successful non-repair
1016 # task, and we're not looping.
1017 #
1018 # The for loop below is very expensive, because it must fetch the
1019 # full history, regardless of how many tasks we examine. At the
1020 # time of this writing, this check against the diagnosis task
1021 # reduces the cost of finding loops in the full inventory from hours
1022 # to minutes.
1023 if history.last_diagnosis()[1].name != 'Repair':
1024 return False
1025 repair_ok_count = 0
1026 for task in history:
1027 if not task.is_special:
1028 # This is a test, so we're not looping.
1029 return False
1030 if task.diagnosis == status_history.BROKEN:
1031 # Failed a repair, so we're not looping.
1032 return False
1033 if (task.diagnosis == status_history.WORKING
1034 and task.name != 'Repair'):
1035 # Non-repair task succeeded, so we're not looping.
1036 return False
1037 # At this point, we have either a failed non-repair task, or
1038 # a successful repair.
1039 if task.name == 'Repair':
1040 repair_ok_count += 1
1041 if repair_ok_count >= _REPAIR_LOOP_THRESHOLD:
1042 return True
1043
1044
1045def _perform_repair_loop_report(arguments, inventory):
1046 """Scan the inventory for DUTs stuck in a repair loop.
1047
1048 This routine walks through the given inventory looking for DUTs
1049 where the most recent history shows that the DUT is regularly
1050 passing repair tasks, but has not run any tests.
1051
1052 @param arguments Command-line arguments as returned by
1053 `ArgumentParser`
Richard Barnette5de01eb2017-12-15 09:53:42 -08001054 @param inventory `_LabInventory` object to be reported on.
Richard Barnettecf5d8342017-10-24 18:13:11 -07001055 """
1056 loop_presence = metrics.BooleanMetric(
1057 'chromeos/autotest/inventory/repair_loops',
1058 'DUTs stuck in repair loops')
1059 logging.info('Scanning for DUTs in repair loops.')
Richard Barnette5de01eb2017-12-15 09:53:42 -08001060 for counts in inventory.itervalues():
Richard Barnettecf5d8342017-10-24 18:13:11 -07001061 for history in counts.get_working_list():
1062 # Managed DUTs with names that don't match
1063 # _HOSTNAME_PATTERN shouldn't be possible. However, we
1064 # don't want arbitrary strings being attached to the
1065 # 'dut_hostname' field, so for safety, we exclude all
1066 # anomalies.
1067 if not _HOSTNAME_PATTERN.match(history.hostname):
1068 continue
1069 if _dut_in_repair_loop(history):
1070 fields = {'dut_hostname': history.hostname,
Richard Barnette5de01eb2017-12-15 09:53:42 -08001071 'model': history.host_model,
Richard Barnettecf5d8342017-10-24 18:13:11 -07001072 'pool': history.host_pool}
1073 logging.info('Looping DUT: %(dut_hostname)s, '
Richard Barnette5de01eb2017-12-15 09:53:42 -08001074 'model: %(model)s, pool: %(pool)s',
Richard Barnettecf5d8342017-10-24 18:13:11 -07001075 fields)
1076 loop_presence.set(True, fields=fields)
1077
1078
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001079def _log_startup(arguments, startup_time):
1080 """Log the start of this inventory run.
1081
1082 Print various log messages indicating the start of the run. Return
1083 a string based on `startup_time` that will be used to identify this
1084 run in log files and e-mail messages.
1085
1086 @param startup_time A UNIX timestamp marking the moment when
1087 this inventory run began.
1088 @returns A timestamp string that will be used to identify this run
1089 in logs and email output.
1090 """
1091 timestamp = time.strftime('%Y-%m-%d.%H',
1092 time.localtime(startup_time))
1093 logging.debug('Starting lab inventory for %s', timestamp)
Richard Barnette5de01eb2017-12-15 09:53:42 -08001094 if arguments.model_notify:
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001095 if arguments.recommend:
1096 logging.debug('Will include repair recommendations')
Richard Barnette5de01eb2017-12-15 09:53:42 -08001097 logging.debug('Will include model inventory')
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001098 if arguments.pool_notify:
1099 logging.debug('Will include pool inventory')
1100 return timestamp
1101
1102
1103def _create_inventory(arguments, end_time):
1104 """Create the `_LabInventory` instance to use for reporting.
1105
1106 @param end_time A UNIX timestamp for the end of the time range
1107 to be searched in this inventory run.
1108 """
1109 start_time = end_time - arguments.duration * 60 * 60
1110 afe = frontend_wrappers.RetryingAFE(server=None)
1111 inventory = _LabInventory.create_inventory(
Richard Barnette5de01eb2017-12-15 09:53:42 -08001112 afe, start_time, end_time, arguments.modelnames)
1113 logging.info('Found %d hosts across %d models',
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001114 inventory.get_num_duts(),
Richard Barnette5de01eb2017-12-15 09:53:42 -08001115 inventory.get_num_models())
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001116 return inventory
1117
1118
Richard Barnettecf5d8342017-10-24 18:13:11 -07001119def _perform_inventory_reports(arguments):
1120 """Perform all inventory checks requested on the command line.
1121
1122 Create the initial inventory and run through the inventory reports
1123 as called for by the parsed command-line arguments.
1124
1125 @param arguments Command-line arguments as returned by
1126 `ArgumentParser`.
1127 """
1128 startup_time = time.time()
1129 timestamp = _log_startup(arguments, startup_time)
1130 inventory = _create_inventory(arguments, startup_time)
1131 if arguments.debug:
Richard Barnette5de01eb2017-12-15 09:53:42 -08001132 _populate_model_counts(inventory)
1133 if arguments.model_notify:
1134 _perform_model_inventory(arguments, inventory, timestamp)
Richard Barnettecf5d8342017-10-24 18:13:11 -07001135 if arguments.pool_notify:
1136 _perform_pool_inventory(arguments, inventory, timestamp)
1137 if arguments.repair_loops:
1138 _perform_repair_loop_report(arguments, inventory)
1139
1140
J. Richard Barnette96db3492015-03-27 17:23:52 -07001141def _separate_email_addresses(address_list):
1142 """Parse a list of comma-separated lists of e-mail addresses.
1143
1144 @param address_list A list of strings containing comma
1145 separate e-mail addresses.
1146 @return A list of the individual e-mail addresses.
1147
1148 """
1149 newlist = []
1150 for arg in address_list:
1151 newlist.extend([email.strip() for email in arg.split(',')])
1152 return newlist
1153
1154
1155def _verify_arguments(arguments):
1156 """Validate command-line arguments.
1157
Richard Barnette5de01eb2017-12-15 09:53:42 -08001158 Join comma separated e-mail addresses for `--model-notify` and
J. Richard Barnette96db3492015-03-27 17:23:52 -07001159 `--pool-notify` in separate option arguments into a single list.
1160
J. Richard Barnette02e82432015-10-13 16:02:47 -07001161 For non-debug uses, require that notification be requested for
1162 at least one report. For debug, if notification isn't specified,
1163 treat it as "run all the reports."
1164
1165 The return value indicates success or failure; in the case of
1166 failure, we also write an error message to stderr.
1167
J. Richard Barnette96db3492015-03-27 17:23:52 -07001168 @param arguments Command-line arguments as returned by
1169 `ArgumentParser`
J. Richard Barnette02e82432015-10-13 16:02:47 -07001170 @return True if the arguments are semantically good, or False
1171 if the arguments don't meet requirements.
J. Richard Barnette96db3492015-03-27 17:23:52 -07001172
1173 """
Richard Barnette5de01eb2017-12-15 09:53:42 -08001174 arguments.model_notify = _separate_email_addresses(
1175 arguments.model_notify)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001176 arguments.pool_notify = _separate_email_addresses(
1177 arguments.pool_notify)
Richard Barnette5de01eb2017-12-15 09:53:42 -08001178 if not arguments.model_notify and not arguments.pool_notify:
J. Richard Barnette02e82432015-10-13 16:02:47 -07001179 if not arguments.debug:
1180 sys.stderr.write('Must specify at least one of '
Richard Barnette5de01eb2017-12-15 09:53:42 -08001181 '--model-notify or --pool-notify\n')
J. Richard Barnette02e82432015-10-13 16:02:47 -07001182 return False
1183 else:
1184 # We want to run all the reports. An empty notify list
1185 # will cause a report to be skipped, so make sure the
1186 # lists are non-empty.
Richard Barnette5de01eb2017-12-15 09:53:42 -08001187 arguments.model_notify = ['']
J. Richard Barnette02e82432015-10-13 16:02:47 -07001188 arguments.pool_notify = ['']
1189 return True
J. Richard Barnette96db3492015-03-27 17:23:52 -07001190
1191
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001192def _get_default_logdir(script):
J. Richard Barnette96db3492015-03-27 17:23:52 -07001193 """Get the default directory for the `--logdir` option.
1194
1195 The default log directory is based on the parent directory
1196 containing this script.
1197
1198 @param script Path to this script file.
1199 @return A path to a directory.
1200
1201 """
1202 basedir = os.path.dirname(os.path.abspath(script))
1203 basedir = os.path.dirname(basedir)
1204 return os.path.join(basedir, _LOGDIR)
1205
1206
1207def _parse_command(argv):
1208 """Parse the command line arguments.
1209
1210 Create an argument parser for this command's syntax, parse the
1211 command line, and return the result of the ArgumentParser
1212 parse_args() method.
1213
1214 @param argv Standard command line argument vector; argv[0] is
1215 assumed to be the command name.
1216 @return Result returned by ArgumentParser.parse_args().
1217
1218 """
1219 parser = argparse.ArgumentParser(
1220 prog=argv[0],
1221 description='Gather and report lab inventory statistics')
1222 parser.add_argument('-d', '--duration', type=int,
1223 default=_DEFAULT_DURATION, metavar='HOURS',
1224 help='number of hours back to search for status'
1225 ' (default: %d)' % _DEFAULT_DURATION)
Richard Barnette5de01eb2017-12-15 09:53:42 -08001226 parser.add_argument('--model-notify', action='append',
J. Richard Barnette96db3492015-03-27 17:23:52 -07001227 default=[], metavar='ADDRESS',
Richard Barnette5de01eb2017-12-15 09:53:42 -08001228 help='Generate model inventory message, '
J. Richard Barnette96db3492015-03-27 17:23:52 -07001229 'and send it to the given e-mail address(es)')
1230 parser.add_argument('--pool-notify', action='append',
1231 default=[], metavar='ADDRESS',
1232 help='Generate pool inventory message, '
1233 'and send it to the given address(es)')
J. Richard Barnette1df6a562015-06-09 10:06:17 -07001234 parser.add_argument('-r', '--recommend', type=int, default=None,
J. Richard Barnettef6839282015-06-01 16:00:35 -07001235 help=('Specify how many DUTs should be '
J. Richard Barnette1df6a562015-06-09 10:06:17 -07001236 'recommended for repair (default: no '
1237 'recommendation)'))
Richard Barnettecf5d8342017-10-24 18:13:11 -07001238 parser.add_argument('--repair-loops', action='store_true',
1239 help='Check for devices stuck in repair loops.')
Richard Barnette88b94892018-02-07 12:11:02 -08001240 parser.add_argument('--debug-metrics', action='store_true',
1241 help='Include debug information about the metrics '
1242 'that would be reported ')
J. Richard Barnette02e82432015-10-13 16:02:47 -07001243 parser.add_argument('--debug', action='store_true',
J. Richard Barnette96db3492015-03-27 17:23:52 -07001244 help='Print e-mail messages on stdout '
1245 'without sending them.')
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001246 parser.add_argument('--logdir', default=_get_default_logdir(argv[0]),
J. Richard Barnette96db3492015-03-27 17:23:52 -07001247 help='Directory where logs will be written.')
Richard Barnette5de01eb2017-12-15 09:53:42 -08001248 parser.add_argument('modelnames', nargs='*',
1249 metavar='MODEL',
1250 help='names of models to report on '
1251 '(default: all models)')
J. Richard Barnette96db3492015-03-27 17:23:52 -07001252 arguments = parser.parse_args(argv[1:])
J. Richard Barnette02e82432015-10-13 16:02:47 -07001253 if not _verify_arguments(arguments):
1254 return None
J. Richard Barnette96db3492015-03-27 17:23:52 -07001255 return arguments
1256
1257
1258def _configure_logging(arguments):
1259 """Configure the `logging` module for our needs.
1260
Richard Barnette3dcbb6a2017-10-23 17:57:50 -07001261 How we log depends on whether the `--debug` option was provided on
1262 the command line.
1263 * Without the option, we configure the logging to capture all
1264 potentially relevant events in a log file. The log file is
1265 configured to rotate once a week on Friday evening, preserving
1266 ~3 months worth of history.
1267 * With the option, we expect stdout to contain other
1268 human-readable output (including the contents of the e-mail
Richard Barnettecf5d8342017-10-24 18:13:11 -07001269 messages), so we restrict the output to INFO level.
1270
1271 For convenience, when `--debug` is on, the logging format has
1272 no adornments, so that a call like `logging.info(msg)` simply writes
1273 `msg` to stdout, plus a trailing newline.
J. Richard Barnette96db3492015-03-27 17:23:52 -07001274
1275 @param arguments Command-line arguments as returned by
1276 `ArgumentParser`
J. Richard Barnette96db3492015-03-27 17:23:52 -07001277 """
J. Richard Barnettef6839282015-06-01 16:00:35 -07001278 root_logger = logging.getLogger()
J. Richard Barnette02e82432015-10-13 16:02:47 -07001279 if arguments.debug:
J. Richard Barnettef6839282015-06-01 16:00:35 -07001280 root_logger.setLevel(logging.INFO)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001281 handler = logging.StreamHandler(sys.stdout)
1282 handler.setFormatter(logging.Formatter())
1283 else:
Richard Barnette5af97402016-04-18 11:00:26 -07001284 if not os.path.exists(arguments.logdir):
1285 os.mkdir(arguments.logdir)
J. Richard Barnettef6839282015-06-01 16:00:35 -07001286 root_logger.setLevel(logging.DEBUG)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001287 logfile = os.path.join(arguments.logdir, _LOGFILE)
1288 handler = logging.handlers.TimedRotatingFileHandler(
1289 logfile, when='W4', backupCount=13)
1290 formatter = logging.Formatter(_LOG_FORMAT,
1291 time_utils.TIME_FMT)
1292 handler.setFormatter(formatter)
J. Richard Barnettef6839282015-06-01 16:00:35 -07001293 # TODO(jrbarnette) This is gross. Importing client.bin.utils
1294 # implicitly imported logging_config, which calls
1295 # logging.basicConfig() *at module level*. That gives us an
1296 # extra logging handler that we don't want. So, clear out all
1297 # the handlers here.
1298 for h in root_logger.handlers:
1299 root_logger.removeHandler(h)
1300 root_logger.addHandler(handler)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001301
1302
J. Richard Barnette96db3492015-03-27 17:23:52 -07001303def main(argv):
1304 """Standard main routine.
Richard Barnettecf5d8342017-10-24 18:13:11 -07001305
1306 @param argv Command line arguments, including `sys.argv[0]`.
J. Richard Barnette96db3492015-03-27 17:23:52 -07001307 """
1308 arguments = _parse_command(argv)
J. Richard Barnette02e82432015-10-13 16:02:47 -07001309 if not arguments:
1310 sys.exit(1)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001311 _configure_logging(arguments)
1312 try:
Richard Barnette88b94892018-02-07 12:11:02 -08001313 if arguments.debug_metrics or not arguments.debug:
1314 metrics_file = None if not arguments.debug_metrics else '/dev/null'
Richard Barnettecf5d8342017-10-24 18:13:11 -07001315 with site_utils.SetupTsMonGlobalState(
Richard Barnette88b94892018-02-07 12:11:02 -08001316 'repair_loops', short_lived=True,
1317 debug_file=metrics_file,
1318 auto_flush=False):
Richard Barnettecf5d8342017-10-24 18:13:11 -07001319 _perform_inventory_reports(arguments)
Richard Barnette88405882018-02-07 11:39:30 -08001320 metrics.Flush()
Richard Barnettecf5d8342017-10-24 18:13:11 -07001321 else:
1322 _perform_inventory_reports(arguments)
J. Richard Barnette96db3492015-03-27 17:23:52 -07001323 except KeyboardInterrupt:
1324 pass
1325 except EnvironmentError as e:
1326 logging.exception('Unexpected OS error: %s', e)
1327 except Exception as e:
1328 logging.exception('Unexpected exception: %s', e)
1329
1330
Kevin Chengcf0ad2b2016-04-19 14:51:39 -07001331def get_inventory(afe):
J. Richard Barnetteaa868932015-10-23 13:28:59 -07001332 end_time = int(time.time())
1333 start_time = end_time - 24 * 60 * 60
Kevin Chengcf0ad2b2016-04-19 14:51:39 -07001334 return _LabInventory.create_inventory(afe, start_time, end_time)
1335
1336
1337def get_managed_boards(afe):
Richard Barnette5de01eb2017-12-15 09:53:42 -08001338 return get_inventory(afe).get_boards()
J. Richard Barnetteaa868932015-10-23 13:28:59 -07001339
1340
J. Richard Barnette96db3492015-03-27 17:23:52 -07001341if __name__ == '__main__':
1342 main(sys.argv)