| #!/usr/bin/env python |
| # Copyright 2015 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Create e-mail reports of the Lab's DUT inventory. |
| |
| Gathers a list of all DUTs of interest in the Lab, segregated by |
| board and pool, and determines whether each DUT is working or |
| broken. Then, send one or more e-mail reports summarizing the |
| status to e-mail addresses provided on the command line. |
| |
| usage: lab_inventory.py [ options ] [ board ... ] |
| |
| Options: |
| --duration / -d <hours> |
| How far back in time to search job history to determine DUT |
| status. |
| |
| --board-notify <address>[,<address>] |
| Send the "board status" e-mail to all the specified e-mail |
| addresses. |
| |
| --pool-notify <address>[,<address>] |
| Send the "pool status" e-mail to all the specified e-mail |
| addresses. |
| |
| --recommend <number> |
| When generating the "board status" e-mail, included a list of |
| <number> specific DUTs to be recommended for repair. |
| |
| --logdir <directory> |
| Log progress and actions in a file under this directory. Text |
| of any e-mail sent will also be logged in a timestamped file in |
| this directory. |
| |
| --print |
| Suppress all logging and sending e-mail. Instead, write the |
| output that would be generated onto stdout. |
| |
| <board> arguments: |
| With no arguments, gathers the status for all boards in the lab. |
| With one or more named boards on the command line, restricts |
| reporting to just those boards. |
| |
| """ |
| |
| |
| import argparse |
| import logging |
| import logging.handlers |
| import os |
| import re |
| import sys |
| import time |
| |
| import common |
| from autotest_lib.client.bin import utils |
| from autotest_lib.client.common_lib import time_utils |
| from autotest_lib.server import frontend |
| from autotest_lib.server.hosts import servo_host |
| from autotest_lib.site_utils import gmail_lib |
| from autotest_lib.site_utils import status_history |
| from autotest_lib.site_utils.suite_scheduler import constants |
| |
| |
| # The pools in the Lab that are actually of interest. |
| # |
| # These are general purpose pools of DUTs that are considered |
| # identical for purposes of testing. That is, a device in one of |
| # these pools can be shifted to another pool at will for purposes |
| # of supplying test demand. |
| # |
| # Devices in these pools are not allowed to have special-purpose |
| # attachments, or to be part of in any kind of custom fixture. |
| # Devices in these pools are also required to reside in areas |
| # managed by the Platforms team (i.e. at the time of this writing, |
| # only in "Atlantis" or "Destiny"). |
| # |
| # _CRITICAL_POOLS - Pools that must be kept fully supplied in order |
| # to guarantee timely completion of tests from builders. |
| # _SPARE_POOL - A low priority pool that is allowed to provide |
| # spares to replace broken devices in the critical pools. |
| # _MANAGED_POOLS - The set of all the general purpose pools |
| # monitored by this script. |
| |
| _CRITICAL_POOLS = ['bvt', 'cq'] |
| _SPARE_POOL = 'suites' |
| _MANAGED_POOLS = _CRITICAL_POOLS + [_SPARE_POOL] |
| |
| # _DEFAULT_DURATION: |
| # Default value used for the --duration command line option. |
| # Specifies how far back in time to search in order to determine |
| # DUT status. |
| |
| _DEFAULT_DURATION = 24 |
| |
| # _LOGDIR: |
| # Relative path used in the calculation of the default setting |
| # for the --logdir option. The full path path is relative to |
| # the root of the autotest directory, as determined from |
| # sys.argv[0]. |
| # _LOGFILE: |
| # Basename of a file to which general log information will be |
| # written. |
| # _LOG_FORMAT: |
| # Format string for log messages. |
| |
| _LOGDIR = os.path.join('logs', 'dut-data') |
| _LOGFILE = 'lab-inventory.log' |
| _LOG_FORMAT = '%(asctime)s | %(levelname)-10s | %(message)s' |
| |
| # Pattern describing location-based host names in the Chrome OS test |
| # labs. Each DUT hostname designates the DUT's location: |
| # * A lab (room) that's physically separated from other labs |
| # (i.e. there's a door). |
| # * A row (or aisle) of DUTs within the lab. |
| # * A vertical rack of shelves on the row. |
| # * A specific host on one shelf of the rack. |
| |
| _HOSTNAME_PATTERN = re.compile( |
| r'(chromeos\d+)-row(\d+)-rack(\d+)-host(\d+)') |
| |
| |
| class _PoolCounts(object): |
| """Maintains a set of `HostJobHistory` objects for a pool. |
| |
| The collected history objects are nominally all part of a single |
| scheduling pool of DUTs. The collection maintains a list of |
| working DUTs, a list of broken DUTs, and a list of all DUTs. |
| |
| Performance note: Certain methods in this class are potentially |
| expensive: |
| * `get_working()` |
| * `get_working_list()` |
| * `get_broken()` |
| * `get_broken_list()` |
| The first time any one of these methods is called, it causes |
| multiple RPC calls with a relatively expensive set of database |
| queries. However, the results of the queries are cached in the |
| individual `HostJobHistory` objects, so only the first call |
| actually pays the full cost. |
| |
| Additionally, `get_working_list()` and `get_broken_list()` both |
| cache their return values to avoid recalculating lists at every |
| call; this caching is separate from the caching of RPC results |
| described above. |
| |
| This class is deliberately constructed to delay the RPC cost |
| until the accessor methods are called (rather than to query in |
| `record_host()`) so that it's possible to construct a complete |
| `_LabInventory` without making the expensive queries at creation |
| time. `_populate_board_counts()`, below, assumes this behavior. |
| |
| """ |
| |
| def __init__(self): |
| self._histories = [] |
| self._working_list = None |
| self._broken_list = None |
| |
| |
| def record_host(self, host_history): |
| """Add one `HostJobHistory` object to the collection. |
| |
| @param host_history The `HostJobHistory` object to be |
| remembered. |
| |
| """ |
| self._working_list = None |
| self._broken_list = None |
| self._histories.append(host_history) |
| |
| |
| def get_working_list(self): |
| """Return a list of all working DUTs in the pool. |
| |
| Filter `self._histories` for histories where the last |
| diagnosis is `WORKING`. |
| |
| Cache the result so that we only cacluate it once. |
| |
| @return A list of HostJobHistory objects. |
| |
| """ |
| if self._working_list is None: |
| self._working_list = [h for h in self._histories |
| if h.last_diagnosis()[0] == status_history.WORKING] |
| return self._working_list |
| |
| |
| def get_working(self): |
| """Return the number of working DUTs in the pool.""" |
| return len(self.get_working_list()) |
| |
| |
| def get_broken_list(self): |
| """Return a list of all broken DUTs in the pool. |
| |
| Filter `self._histories` for histories where the last |
| diagnosis is not `WORKING`. |
| |
| Cache the result so that we only cacluate it once. |
| |
| @return A list of HostJobHistory objects. |
| |
| """ |
| if self._broken_list is None: |
| self._broken_list = [h for h in self._histories |
| if h.last_diagnosis()[0] != status_history.WORKING] |
| return self._broken_list |
| |
| |
| def get_broken(self): |
| """Return the number of broken DUTs in the pool.""" |
| return len(self.get_broken_list()) |
| |
| |
| def get_total(self): |
| """Return the total number of DUTs in the pool.""" |
| return len(self._histories) |
| |
| |
| class _BoardCounts(object): |
| """Maintains a set of `HostJobHistory` objects for a board. |
| |
| The collected history objects are nominally all of the same |
| board. The collection maintains a count of working DUTs, a |
| count of broken DUTs, and a total count. The counts can be |
| obtained either for a single pool, or as a total across all |
| pools. |
| |
| DUTs in the collection must be assigned to one of the pools |
| in `_MANAGED_POOLS`. |
| |
| The `get_working()` and `get_broken()` methods rely on the |
| methods of the same name in _PoolCounts, so the performance |
| note in _PoolCounts applies here as well. |
| |
| """ |
| |
| def __init__(self): |
| self._pools = { |
| pool: _PoolCounts() for pool in _MANAGED_POOLS |
| } |
| |
| def record_host(self, host_history): |
| """Add one `HostJobHistory` object to the collection. |
| |
| @param host_history The `HostJobHistory` object to be |
| remembered. |
| |
| """ |
| pool = host_history.host_pool |
| self._pools[pool].record_host(host_history) |
| |
| |
| def _count_pool(self, get_pool_count, pool=None): |
| """Internal helper to count hosts in a given pool. |
| |
| The `get_pool_count` parameter is a function to calculate |
| the exact count of interest for the pool. |
| |
| @param get_pool_count Function to return a count from a |
| _PoolCount object. |
| @param pool The pool to be counted. If `None`, |
| return the total across all pools. |
| |
| """ |
| if pool is None: |
| return sum([get_pool_count(counts) |
| for counts in self._pools.values()]) |
| else: |
| return get_pool_count(self._pools[pool]) |
| |
| |
| def get_working_list(self): |
| """Return a list of all working DUTs for the board. |
| |
| Go through all HostJobHistory objects in the board's pools, |
| selecting the ones where the last diagnosis is `WORKING`. |
| |
| @return A list of HostJobHistory objects. |
| |
| """ |
| l = [] |
| for p in self._pools.values(): |
| l.extend(p.get_working_list()) |
| return l |
| |
| |
| def get_working(self, pool=None): |
| """Return the number of working DUTs in a pool. |
| |
| @param pool The pool to be counted. If `None`, return the |
| total across all pools. |
| |
| """ |
| return self._count_pool(_PoolCounts.get_working, pool) |
| |
| |
| def get_broken_list(self): |
| """Return a list of all broken DUTs for the board. |
| |
| Go through all HostJobHistory objects in the board's pools, |
| selecting the ones where the last diagnosis is not |
| `WORKING`. |
| |
| @return A list of HostJobHistory objects. |
| |
| """ |
| l = [] |
| for p in self._pools.values(): |
| l.extend(p.get_broken_list()) |
| return l |
| |
| |
| def get_broken(self, pool=None): |
| """Return the number of broken DUTs in a pool. |
| |
| @param pool The pool to be counted. If `None`, return the |
| total across all pools. |
| |
| """ |
| return self._count_pool(_PoolCounts.get_broken, pool) |
| |
| |
| def get_total(self, pool=None): |
| """Return the total number of DUTs in a pool. |
| |
| @param pool The pool to be counted. If `None`, return the |
| total across all pools. |
| |
| """ |
| return self._count_pool(_PoolCounts.get_total, pool) |
| |
| |
| class _LabInventory(dict): |
| """Collection of `HostJobHistory` objects for the Lab's inventory. |
| |
| The collection is indexed by board. Indexing returns the |
| _BoardCounts object associated with the board. |
| |
| The collection is also iterable. The iterator returns all the |
| boards in the inventory, in unspecified order. |
| |
| """ |
| |
| @classmethod |
| def create_inventory(cls, afe, start_time, end_time, boardlist=[]): |
| """Return a Lab inventory with specified parameters. |
| |
| By default, gathers inventory from `HostJobHistory` objects |
| for all DUTs in the `_MANAGED_POOLS` list. If `boardlist` |
| is supplied, the inventory will be restricted to only the |
| given boards. |
| |
| @param afe AFE object for constructing the |
| `HostJobHistory` objects. |
| @param start_time Start time for the `HostJobHistory` |
| objects. |
| @param end_time End time for the `HostJobHistory` |
| objects. |
| @param boardlist List of boards to include. If empty, |
| include all available boards. |
| @return A `_LabInventory` object for the specified boards. |
| |
| """ |
| label_list = [constants.Labels.POOL_PREFIX + l |
| for l in _MANAGED_POOLS] |
| afehosts = afe.get_hosts(labels__name__in=label_list) |
| if boardlist: |
| boardhosts = [] |
| for board in boardlist: |
| board_label = constants.Labels.BOARD_PREFIX + board |
| host_list = [h for h in afehosts |
| if board_label in h.labels] |
| boardhosts.extend(host_list) |
| afehosts = boardhosts |
| create = lambda host: ( |
| status_history.HostJobHistory(afe, host, |
| start_time, end_time)) |
| return cls([create(host) for host in afehosts]) |
| |
| |
| def __init__(self, histories): |
| # N.B. The query that finds our hosts is restricted to those |
| # with a valid pool: label, but doesn't check for a valid |
| # board: label. In some (insufficiently) rare cases, the |
| # AFE hosts table has been known to (incorrectly) have DUTs |
| # with a pool: but no board: label. We explicitly exclude |
| # those here. |
| histories = [h for h in histories |
| if h.host_board is not None] |
| boards = set([h.host_board for h in histories]) |
| initval = { board: _BoardCounts() for board in boards } |
| super(_LabInventory, self).__init__(initval) |
| self._dut_count = len(histories) |
| self._board_counts = None |
| for h in histories: |
| self[h.host_board].record_host(h) |
| |
| |
| def get_working_list(self): |
| """Return a list of all working DUTs in the inventory. |
| |
| Go through all HostJobHistory objects in the inventory, |
| selecting the ones where the last diagnosis is `WORKING`. |
| |
| @return A list of HostJobHistory objects. |
| |
| """ |
| l = [] |
| for counts in self.values(): |
| l.extend(counts.get_working_list()) |
| return l |
| |
| |
| def get_broken_list(self): |
| """Return a list of all broken DUTs in the inventory. |
| |
| Go through all HostJobHistory objects in the inventory, |
| selecting the ones where the last diagnosis is not |
| `WORKING`. |
| |
| @return A list of HostJobHistory objects. |
| |
| """ |
| l = [] |
| for counts in self.values(): |
| l.extend(counts.get_broken_list()) |
| return l |
| |
| |
| def get_board_counts(self): |
| """Calculate a summary of board counts. |
| |
| The summary is a list of tuples. The tuple elements, in |
| order, are: |
| * board - The name of the board associated with the |
| counts. |
| * buffer - The buffer of working spares (the total number |
| of spares, less the number of broken DUTs). |
| * broken - The number of broken DUTs. |
| * working - The number of working DUTs. |
| * spares - The number of DUTs in the spares pool. |
| * total - The the total number of DUTs. |
| |
| Boards with no DUTs in the spares pool or no DUTs in a |
| critical pool will be excluded from the listed counts. |
| |
| The ordering of the boards is unspecified. |
| |
| @param inventory The inventory to be summarized. |
| @return A list of tuples with board data. |
| |
| """ |
| if self._board_counts is None: |
| self._board_counts = [] |
| for board, counts in self.items(): |
| logging.debug('Counting inventory for %s', board) |
| spares = counts.get_total(_SPARE_POOL) |
| total = counts.get_total() |
| if spares == 0 or spares == total: |
| continue |
| working = counts.get_working() |
| broken = counts.get_broken() |
| spare_buffer = spares - broken |
| element = (board, spare_buffer, broken, working, |
| spares, total) |
| self._board_counts.append(element) |
| return self._board_counts |
| |
| |
| def get_num_duts(self): |
| """Return the total number of DUTs in the inventory.""" |
| return self._dut_count |
| |
| |
| def get_num_boards(self): |
| """Return the total number of boards in the inventory.""" |
| return len(self) |
| |
| |
| def _sort_by_location(inventory_list): |
| """Return a list of DUTs, organized by location. |
| |
| Take the given list of `HostJobHistory` objects, separate it |
| into a list per lab, and sort each lab's list by location. The |
| order of sorting within a lab is |
| * By row number within the lab, |
| * then by rack number within the row, |
| * then by host shelf number within the rack. |
| |
| Return a list of the sorted lists. |
| |
| Implementation note: host locations are sorted by converting |
| each location into a base 100 number. If row, rack or |
| host numbers exceed the range [0..99], then sorting will |
| break down. |
| |
| @return A list of sorted lists of DUTs. |
| |
| """ |
| BASE = 100 |
| lab_lists = {} |
| for history in inventory_list: |
| location = _HOSTNAME_PATTERN.match(history.host.hostname) |
| if location: |
| lab = location.group(1) |
| key = 0 |
| for idx in location.group(2, 3, 4): |
| key = BASE * key + int(idx) |
| lab_lists.setdefault(lab, []).append((key, history)) |
| return_list = [] |
| for dut_list in lab_lists.values(): |
| dut_list.sort(key=lambda t: t[0]) |
| return_list.append([t[1] for t in dut_list]) |
| return return_list |
| |
| |
| def _score_repair_set(buffer_counts, repair_list): |
| """Return a numeric score rating a set of DUTs to be repaired. |
| |
| `buffer_counts` is a dictionary mapping board names to the |
| size of the board's spares buffer. |
| |
| `repair_list` is a list of DUTs to be repaired. |
| |
| This function calculates the new set of buffer counts that would |
| result from the proposed repairs, and scores the new set using |
| two numbers: |
| * Worst case buffer count for any board (higher is better). |
| This is the more siginficant number for comparison. |
| * Number of boards at the worst case (lower is better). This |
| is the less significant number. |
| |
| Implementation note: The score could fail to reflect the |
| intended criteria if there are more than 1000 boards in the |
| inventory. |
| |
| @param spare_counts A dictionary mapping boards to buffer counts. |
| @param repair_list A list of boards to be repaired. |
| @return A numeric score. |
| |
| """ |
| # Go through `buffer_counts`, and create a list of new counts |
| # that records the buffer count for each board after repair. |
| # The new list of counts discards the board names, as they don't |
| # contribute to the final score. |
| _NBOARDS = 1000 |
| repair_inventory = _LabInventory(repair_list) |
| new_counts = [] |
| for b, c in buffer_counts.items(): |
| if b in repair_inventory: |
| newcount = repair_inventory[b].get_total() |
| else: |
| newcount = 0 |
| new_counts.append(c + newcount) |
| # Go through the new list of counts. Find the worst available |
| # spares count, and count how many times that worst case occurs. |
| worst_count = new_counts[0] |
| num_worst = 1 |
| for c in new_counts[1:]: |
| if c == worst_count: |
| num_worst += 1 |
| elif c < worst_count: |
| worst_count = c |
| num_worst = 1 |
| # Return the calculated score |
| return _NBOARDS * worst_count - num_worst |
| |
| |
| def _generate_repair_recommendation(inventory, num_recommend): |
| """Return a summary of selected DUTs needing repair. |
| |
| Returns a message recommending a list of broken DUTs to be |
| repaired. The list of DUTs is selected based on these |
| criteria: |
| * No more than `num_recommend` DUTs will be listed. |
| * All DUTs must be in the same lab. |
| * DUTs should be selected for some degree of physical |
| proximity. |
| * DUTs for boards with a low spares buffer are more important |
| than DUTs with larger buffers. |
| |
| The algorithm used will guarantee that at least one DUT from a |
| board with the smallest spares buffer will be recommended. If |
| the worst spares buffer number is shared by more than one board, |
| the algorithm will tend to prefer repair sets that include more |
| of those boards over sets that cover fewer boards. |
| |
| @param inventory Inventory for generating recommendations. |
| @param num_recommend Number of DUTs to recommend for repair. |
| |
| """ |
| logging.debug('Creating DUT repair recommendations') |
| board_counts = inventory.get_board_counts() |
| # t[0] - board name |
| # t[1] - size of spares buffer |
| # t[2] - number of broken devices |
| board_buffer_counts = {t[0]: t[1] for t in board_counts |
| if t[2] != 0} |
| recommendation = None |
| best_score = None |
| # N.B. The logic of this loop may seem complicated, but |
| # simplification is hard: |
| # * Calculating an initial recommendation outside of |
| # the loop likely would make things more complicated, |
| # not less. |
| # * It's necessary to calculate an initial lab slice once per |
| # lab _before_ the while loop, in case the number of broken |
| # DUTs in a lab is less than `num_recommend`. |
| for lab_duts in _sort_by_location(inventory.get_broken_list()): |
| start = 0 |
| end = num_recommend |
| lab_slice = lab_duts[start : end] |
| lab_score = _score_repair_set(board_buffer_counts, |
| lab_slice) |
| while end < len(lab_duts): |
| start += 1 |
| end += 1 |
| new_slice = lab_duts[start : end] |
| new_score = _score_repair_set(board_buffer_counts, |
| new_slice) |
| if new_score > lab_score: |
| lab_slice = new_slice |
| lab_score = new_score |
| if recommendation is None or lab_score > best_score: |
| recommendation = lab_slice |
| best_score = lab_score |
| message = ['Repair recommendations:\n', |
| '%-30s %-16s %s' % ( |
| 'Hostname', 'Board', 'Servo instructions')] |
| for h in recommendation: |
| servo_name = servo_host.make_servo_hostname(h.host.hostname) |
| if utils.host_is_in_lab_zone(servo_name): |
| servo_message = 'Repair servo first' |
| else: |
| servo_message = 'No servo present' |
| line = '%-30s %-16s %s' % ( |
| h.host.hostname, h.host_board, servo_message) |
| message.append(line) |
| return '\n'.join(message) |
| |
| |
| def _generate_board_inventory_message(inventory): |
| """Generate the "board inventory" e-mail message. |
| |
| The board inventory is a list by board summarizing the number |
| of working and broken DUTs, and the total shortfall or surplus |
| of working devices relative to the minimum critical pool |
| requirement. |
| |
| The report omits boards with no DUTs in the spare pool or with |
| no DUTs in a critical pool. |
| |
| N.B. For sample output text formattted as users can expect to |
| see it in e-mail and log files, refer to the unit tests. |
| |
| @param inventory _LabInventory object with the inventory to |
| be reported on. |
| @return String with the inventory message to be sent. |
| |
| """ |
| logging.debug('Creating board inventory') |
| message = ['Full board inventory:\n', |
| '%-22s %5s %5s %5s %5s %5s' % ( |
| 'Board', 'Avail', 'Bad', 'Good', |
| 'Spare', 'Total')] |
| data_list = inventory.get_board_counts() |
| data_list = sorted(sorted(data_list, key=lambda t: -t[2]), |
| key=lambda t: t[1]) |
| message.extend( |
| ['%-22s %5d %5d %5d %5d %5d' % t for t in data_list]) |
| return '\n'.join(message) |
| |
| |
| _POOL_INVENTORY_HEADER = '''\ |
| Notice to Infrastructure deputies: If any boards are shown at |
| less than full strength, please take action to resolve the issues. |
| Once you're satisified that failures won't recur, failed DUTs can |
| be replaced with spares by running `balance_pool`. Detailed |
| instructions can be found here: |
| http://go/cros-manage-duts |
| ''' |
| |
| |
| def _generate_pool_inventory_message(inventory): |
| """Generate the "pool inventory" e-mail message. |
| |
| The pool inventory is a list by pool and board summarizing the |
| number of working and broken DUTs in the pool. Only boards with |
| at least one broken DUT are included in the list. |
| |
| N.B. For sample output text formattted as users can expect to |
| see it in e-mail and log files, refer to the unit tests. |
| |
| @param inventory _LabInventory object with the inventory to |
| be reported on. |
| @return String with the inventory message to be sent. |
| |
| """ |
| logging.debug('Creating pool inventory') |
| message = [_POOL_INVENTORY_HEADER] |
| newline = '' |
| for pool in _CRITICAL_POOLS: |
| message.append( |
| '%sStatus for pool:%s, by board:' % (newline, pool)) |
| message.append( |
| '%-20s %5s %5s %5s' % ( |
| 'Board', 'Bad', 'Good', 'Total')) |
| data_list = [] |
| for board, counts in inventory.items(): |
| logging.debug('Counting inventory for %s, %s', |
| board, pool) |
| broken = counts.get_broken(pool) |
| if broken == 0: |
| continue |
| working = counts.get_working(pool) |
| total = counts.get_total(pool) |
| data_list.append((board, broken, working, total)) |
| if data_list: |
| data_list = sorted(data_list, key=lambda d: -d[1]) |
| message.extend( |
| ['%-20s %5d %5d %5d' % t for t in data_list]) |
| else: |
| message.append('(All boards at full strength)') |
| newline = '\n' |
| return '\n'.join(message) |
| |
| |
| def _send_email(arguments, tag, subject, recipients, body): |
| """Send an inventory e-mail message. |
| |
| The message is logged in the selected log directory using `tag` |
| for the file name. |
| |
| If the --print option was requested, the message is neither |
| logged nor sent, but merely printed on stdout. |
| |
| @param arguments Parsed command-line options. |
| @param tag Tag identifying the inventory for logging |
| purposes. |
| @param subject E-mail Subject: header line. |
| @param recipients E-mail addresses for the To: header line. |
| @param body E-mail message body. |
| |
| """ |
| logging.debug('Generating email: "%s"', subject) |
| all_recipients = ', '.join(recipients) |
| report_body = '\n'.join([ |
| 'To: %s' % all_recipients, |
| 'Subject: %s' % subject, |
| '', body, '']) |
| if arguments.print_: |
| print report_body |
| else: |
| filename = os.path.join(arguments.logdir, tag) |
| try: |
| report_file = open(filename, 'w') |
| report_file.write(report_body) |
| report_file.close() |
| except EnvironmentError as e: |
| logging.error('Failed to write %s: %s', filename, e) |
| try: |
| gmail_lib.send_email(all_recipients, subject, body) |
| except Exception as e: |
| logging.error('Failed to send e-mail to %s: %s', |
| all_recipients, e) |
| |
| |
| def _separate_email_addresses(address_list): |
| """Parse a list of comma-separated lists of e-mail addresses. |
| |
| @param address_list A list of strings containing comma |
| separate e-mail addresses. |
| @return A list of the individual e-mail addresses. |
| |
| """ |
| newlist = [] |
| for arg in address_list: |
| newlist.extend([email.strip() for email in arg.split(',')]) |
| return newlist |
| |
| |
| def _verify_arguments(arguments): |
| """Validate command-line arguments. |
| |
| Join comma separated e-mail addresses for `--board-notify` and |
| `--pool-notify` in separate option arguments into a single list. |
| |
| @param arguments Command-line arguments as returned by |
| `ArgumentParser` |
| |
| """ |
| arguments.board_notify = _separate_email_addresses( |
| arguments.board_notify) |
| arguments.pool_notify = _separate_email_addresses( |
| arguments.pool_notify) |
| |
| |
| def _get_logdir(script): |
| """Get the default directory for the `--logdir` option. |
| |
| The default log directory is based on the parent directory |
| containing this script. |
| |
| @param script Path to this script file. |
| @return A path to a directory. |
| |
| """ |
| basedir = os.path.dirname(os.path.abspath(script)) |
| basedir = os.path.dirname(basedir) |
| return os.path.join(basedir, _LOGDIR) |
| |
| |
| def _parse_command(argv): |
| """Parse the command line arguments. |
| |
| Create an argument parser for this command's syntax, parse the |
| command line, and return the result of the ArgumentParser |
| parse_args() method. |
| |
| @param argv Standard command line argument vector; argv[0] is |
| assumed to be the command name. |
| @return Result returned by ArgumentParser.parse_args(). |
| |
| """ |
| parser = argparse.ArgumentParser( |
| prog=argv[0], |
| description='Gather and report lab inventory statistics') |
| parser.add_argument('-d', '--duration', type=int, |
| default=_DEFAULT_DURATION, metavar='HOURS', |
| help='number of hours back to search for status' |
| ' (default: %d)' % _DEFAULT_DURATION) |
| parser.add_argument('--board-notify', action='append', |
| default=[], metavar='ADDRESS', |
| help='Generate board inventory message, ' |
| 'and send it to the given e-mail address(es)') |
| parser.add_argument('--pool-notify', action='append', |
| default=[], metavar='ADDRESS', |
| help='Generate pool inventory message, ' |
| 'and send it to the given address(es)') |
| parser.add_argument('-r', '--recommend', type=int, default=None, |
| help=('Specify how many DUTs should be ' |
| 'recommended for repair (default: no ' |
| 'recommendation)')) |
| parser.add_argument('--print', dest='print_', action='store_true', |
| help='Print e-mail messages on stdout ' |
| 'without sending them.') |
| parser.add_argument('--logdir', default=_get_logdir(argv[0]), |
| help='Directory where logs will be written.') |
| parser.add_argument('boardnames', nargs='*', |
| metavar='BOARD', |
| help='names of boards to report on ' |
| '(default: all boards)') |
| arguments = parser.parse_args(argv[1:]) |
| _verify_arguments(arguments) |
| return arguments |
| |
| |
| def _configure_logging(arguments): |
| """Configure the `logging` module for our needs. |
| |
| How we log depends on whether the `--print` option was |
| provided on the command line. Without the option, we log all |
| messages at DEBUG level or above, and write them to a file in |
| the directory specified by the `--logdir` option. With the |
| option, we write log messages to stdout; messages below INFO |
| level are discarded. |
| |
| The log file is configured to rotate once a week on Friday |
| evening, preserving ~3 months worth of history. |
| |
| @param arguments Command-line arguments as returned by |
| `ArgumentParser` |
| |
| """ |
| root_logger = logging.getLogger() |
| if arguments.print_: |
| root_logger.setLevel(logging.INFO) |
| handler = logging.StreamHandler(sys.stdout) |
| handler.setFormatter(logging.Formatter()) |
| else: |
| root_logger.setLevel(logging.DEBUG) |
| logfile = os.path.join(arguments.logdir, _LOGFILE) |
| handler = logging.handlers.TimedRotatingFileHandler( |
| logfile, when='W4', backupCount=13) |
| formatter = logging.Formatter(_LOG_FORMAT, |
| time_utils.TIME_FMT) |
| handler.setFormatter(formatter) |
| # TODO(jrbarnette) This is gross. Importing client.bin.utils |
| # implicitly imported logging_config, which calls |
| # logging.basicConfig() *at module level*. That gives us an |
| # extra logging handler that we don't want. So, clear out all |
| # the handlers here. |
| for h in root_logger.handlers: |
| root_logger.removeHandler(h) |
| root_logger.addHandler(handler) |
| |
| |
| def _populate_board_counts(inventory): |
| """Gather board counts while providing interactive feedback. |
| |
| Gathering the status of all individual DUTs in the lab can take |
| considerable time (~30 minutes at the time of this writing). |
| |
| Normally, we pay that cost by querying as we go. However, with |
| the `--print` option, a human being may be watching the |
| progress. So, we force the first (expensive) queries to happen |
| up front, and provide a small ASCII progress bar to give an |
| indicator of how many boards have been processed. |
| |
| @param inventory _LabInventory object with the inventory to |
| be gathered. |
| |
| """ |
| n = 0 |
| total_broken = 0 |
| for counts in inventory.values(): |
| n += 1 |
| if n % 10 == 5: |
| c = '+' |
| elif n % 10 == 0: |
| c = '%d' % ((n / 10) % 10) |
| else: |
| c = '.' |
| sys.stdout.write(c) |
| sys.stdout.flush() |
| # This next call is where all the time goes - it forces all |
| # of a board's HostJobHistory objects to query the database |
| # and cache their results. |
| total_broken += counts.get_broken() |
| sys.stdout.write('\n') |
| sys.stdout.write('Found %d broken DUTs\n' % total_broken) |
| |
| |
| def main(argv): |
| """Standard main routine. |
| @param argv Command line arguments including `sys.argv[0]`. |
| """ |
| arguments = _parse_command(argv) |
| _configure_logging(arguments) |
| try: |
| end_time = int(time.time()) |
| start_time = end_time - arguments.duration * 60 * 60 |
| timestamp = time.strftime('%Y-%m-%d.%H', |
| time.localtime(end_time)) |
| logging.debug('Starting lab inventory for %s', timestamp) |
| if arguments.board_notify: |
| if arguments.recommend: |
| logging.debug('Will include repair recommendations') |
| logging.debug('Will include board inventory') |
| if arguments.pool_notify: |
| logging.debug('Will include pool inventory') |
| |
| afe = frontend.AFE(server=None) |
| inventory = _LabInventory.create_inventory( |
| afe, start_time, end_time, arguments.boardnames) |
| logging.info('Found %d hosts across %d boards', |
| inventory.get_num_duts(), |
| inventory.get_num_boards()) |
| |
| if arguments.print_: |
| _populate_board_counts(inventory) |
| |
| if arguments.print_ or arguments.board_notify: |
| if arguments.recommend: |
| recommend_message = _generate_repair_recommendation( |
| inventory, arguments.recommend) + '\n\n\n' |
| else: |
| recommend_message = '' |
| board_message = _generate_board_inventory_message(inventory) |
| full_message = recommend_message + board_message |
| _send_email(arguments, |
| 'boards-%s.txt' % timestamp, |
| 'DUT board inventory %s' % timestamp, |
| arguments.board_notify, |
| full_message) |
| |
| if arguments.print_ or arguments.pool_notify: |
| _send_email(arguments, |
| 'pools-%s.txt' % timestamp, |
| 'DUT pool inventory %s' % timestamp, |
| arguments.pool_notify, |
| _generate_pool_inventory_message(inventory)) |
| except KeyboardInterrupt: |
| pass |
| except EnvironmentError as e: |
| logging.exception('Unexpected OS error: %s', e) |
| except Exception as e: |
| logging.exception('Unexpected exception: %s', e) |
| |
| |
| if __name__ == '__main__': |
| main(sys.argv) |