blob: c9ca4edd7e186205292e9e61112dd17e350eae33 [file] [log] [blame]
J. Richard Barnette91d56812015-04-21 10:22:31 -07001#!/usr/bin/env python
2# Copyright 2015 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Adjust pool balances to cover DUT shortfalls.
7
8This command takes all broken DUTs in a specific pool for specific
9boards and swaps them with working DUTs taken from a selected pool
10of spares. The command is meant primarily for replacing broken DUTs
11in critical pools like BVT or CQ, but it can also be used to adjust
12pool sizes, or to create or remove pools.
13
14usage: balance_pool.py [ options ] POOL BOARD [ BOARD ... ]
15
16positional arguments:
17 POOL Name of the pool to balance
18 BOARD Names of boards to balance
19
20optional arguments:
21 -h, --help show this help message and exit
22 -t COUNT, --total COUNT
23 Set the number of DUTs in the pool to the specified
24 count for every BOARD
25 -a COUNT, --grow COUNT
26 Add the specified number of DUTs to the pool for every
27 BOARD
28 -d COUNT, --shrink COUNT
29 Remove the specified number of DUTs from the pool for
30 every BOARD
31 -s POOL, --spare POOL
32 Pool from which to draw replacement spares (default:
33 pool:suites)
34 -n, --dry-run Report actions to take in the form of shell commands
35
36
37The command attempts to remove all broken DUTs from the target POOL
38for every BOARD, and replace them with enough working DUTs taken
39from the spare pool to bring the strength of POOL to the requested
40total COUNT.
41
42If no COUNT options are supplied (i.e. there are no --total, --grow,
43or --shrink options), the command will maintain the current totals of
44DUTs for every BOARD in the target POOL.
45
46If not enough working spares are available, broken DUTs may be left
47in the pool to keep the pool at the target COUNT.
48
49When reducing pool size, working DUTs will be returned after broken
50DUTs, if it's necessary to achieve the target COUNT.
51
52"""
53
54
55import argparse
56import sys
57import time
58
59import common
60from autotest_lib.server import frontend
Aviv Keshet7ee95862016-08-30 15:18:27 -070061from autotest_lib.server.lib import status_history
Kevin Chengcf0ad2b2016-04-19 14:51:39 -070062from autotest_lib.site_utils import lab_inventory
J. Richard Barnette91d56812015-04-21 10:22:31 -070063from autotest_lib.site_utils.suite_scheduler import constants
64
David James2a3cb542015-05-05 17:13:43 -070065from chromite.lib import parallel
66
J. Richard Barnette91d56812015-04-21 10:22:31 -070067
68_POOL_PREFIX = constants.Labels.POOL_PREFIX
Kevin Chengcf0ad2b2016-04-19 14:51:39 -070069# This is the ratio of all boards we should calculate the default max number of
70# broken boards against. It seemed like the best choice that was neither too
71# strict nor lax.
72_MAX_BROKEN_BOARDS_DEFAULT_RATIO = 3.0 / 8.0
73
74_ALL_CRITICAL_POOLS = 'all_critical_pools'
75_SPARE_DEFAULT = lab_inventory.SPARE_POOL
J. Richard Barnette91d56812015-04-21 10:22:31 -070076
77
78def _log_message(message, *args):
79 """Log a message with optional format arguments to stdout.
80
81 This function logs a single line to stdout, with formatting
82 if necessary, and without adornments.
83
84 If `*args` are supplied, the message will be formatted using
85 the arguments.
86
87 @param message Message to be logged, possibly after formatting.
88 @param args Format arguments. If empty, the message is logged
89 without formatting.
90
91 """
92 if args:
93 message = message % args
94 sys.stdout.write('%s\n' % message)
95
96
97def _log_info(dry_run, message, *args):
98 """Log information in a dry-run dependent fashion.
99
100 This function logs a single line to stdout, with formatting
101 if necessary. When logging for a dry run, the message is
102 printed as a shell comment, rather than as unadorned text.
103
104 If `*args` are supplied, the message will be formatted using
105 the arguments.
106
107 @param message Message to be logged, possibly after formatting.
108 @param args Format arguments. If empty, the message is logged
109 without formatting.
110
111 """
112 if dry_run:
113 message = '# ' + message
114 _log_message(message, *args)
115
116
117def _log_error(message, *args):
118 """Log an error to stderr, with optional format arguments.
119
120 This function logs a single line to stderr, prefixed to indicate
121 that it is an error message.
122
123 If `*args` are supplied, the message will be formatted using
124 the arguments.
125
126 @param message Message to be logged, possibly after formatting.
127 @param args Format arguments. If empty, the message is logged
128 without formatting.
129
130 """
131 if args:
132 message = message % args
133 sys.stderr.write('ERROR: %s\n' % message)
134
135
136class _DUTPool(object):
137 """Information about a pool of DUTs for a given board.
138
139 This class collects information about all DUTs for a given
140 board and pool pair, and divides them into three categories:
141 + Working - the DUT is working for testing, and not locked.
142 + Broken - the DUT is unable to run tests, or it is locked.
143 + Ineligible - the DUT is not available to be removed from
144 this pool. The DUT may be either working or broken.
145
146 DUTs with more than one pool: label are ineligible for exchange
147 during balancing. This is done for the sake of chameleon hosts,
148 which must always be assigned to pool:suites. These DUTs are
149 always marked with pool:chameleon to prevent their reassignment.
150
151 TODO(jrbarnette): The use of `pool:chamelon` (instead of just
152 the `chameleon` label is a hack that should be eliminated.
153
154 _DUTPool instances are used to track both main pools that need
155 to be resupplied with working DUTs and spare pools that supply
156 those DUTs.
157
158 @property board Name of the board associated with
159 this pool of DUTs.
160 @property pool Name of the pool associated with
161 this pool of DUTs.
David James750c0382015-05-06 19:30:46 -0700162 @property working_hosts The list of this pool's working
J. Richard Barnette91d56812015-04-21 10:22:31 -0700163 DUTs.
David James750c0382015-05-06 19:30:46 -0700164 @property broken_hosts The list of this pool's broken
J. Richard Barnette91d56812015-04-21 10:22:31 -0700165 DUTs.
David James750c0382015-05-06 19:30:46 -0700166 @property ineligible_hosts The list of this pool's ineligible DUTs.
167 @property labels A list of labels that identify a DUT
J. Richard Barnette91d56812015-04-21 10:22:31 -0700168 as part of this pool.
David James750c0382015-05-06 19:30:46 -0700169 @property total_hosts The total number of hosts in pool.
J. Richard Barnette91d56812015-04-21 10:22:31 -0700170
171 """
172
Richard Barnette07303cb2016-04-15 16:56:16 -0700173 def __init__(self, afe, board, pool, start_time, end_time):
J. Richard Barnette91d56812015-04-21 10:22:31 -0700174 self.board = board
175 self.pool = pool
David James750c0382015-05-06 19:30:46 -0700176 self.working_hosts = []
177 self.broken_hosts = []
178 self.ineligible_hosts = []
Richard Barnette07303cb2016-04-15 16:56:16 -0700179 self.total_hosts = self._get_hosts(afe, start_time, end_time)
180 self._labels = [_POOL_PREFIX + self.pool]
J. Richard Barnette91d56812015-04-21 10:22:31 -0700181
182
Richard Barnette07303cb2016-04-15 16:56:16 -0700183 def _get_hosts(self, afe, start_time, end_time):
J. Richard Barnette91d56812015-04-21 10:22:31 -0700184 all_histories = (
185 status_history.HostJobHistory.get_multiple_histories(
186 afe, start_time, end_time,
187 board=self.board, pool=self.pool))
188 for h in all_histories:
189 host = h.host
190 host_pools = [l for l in host.labels
191 if l.startswith(_POOL_PREFIX)]
192 if len(host_pools) != 1:
David James750c0382015-05-06 19:30:46 -0700193 self.ineligible_hosts.append(host)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700194 else:
195 diag = h.last_diagnosis()[0]
196 if (diag == status_history.WORKING and
197 not host.locked):
David James750c0382015-05-06 19:30:46 -0700198 self.working_hosts.append(host)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700199 else:
David James750c0382015-05-06 19:30:46 -0700200 self.broken_hosts.append(host)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700201 return len(all_histories)
202
203
204 @property
205 def pool_labels(self):
206 """Return the AFE labels that identify this pool.
207
208 The returned labels are the labels that must be removed
209 to remove a DUT from the pool, or added to add a DUT.
210
211 @return A list of AFE labels suitable for AFE.add_labels()
212 or AFE.remove_labels().
213
214 """
Richard Barnette07303cb2016-04-15 16:56:16 -0700215 return self._labels
J. Richard Barnette91d56812015-04-21 10:22:31 -0700216
David James750c0382015-05-06 19:30:46 -0700217 def calculate_spares_needed(self, target_total):
J. Richard Barnette91d56812015-04-21 10:22:31 -0700218 """Calculate and log the spares needed to achieve a target.
219
220 Return how many working spares are needed to achieve the
David James750c0382015-05-06 19:30:46 -0700221 given `target_total` with all DUTs working.
J. Richard Barnette91d56812015-04-21 10:22:31 -0700222
223 The spares count may be positive or negative. Positive
224 values indicate spares are needed to replace broken DUTs in
225 order to reach the target; negative numbers indicate that
226 no spares are needed, and that a corresponding number of
227 working devices can be returned.
228
229 If the new target total would require returning ineligible
230 DUTs, an error is logged, and the target total is adjusted
231 so that those DUTs are not exchanged.
232
J. Richard Barnette91d56812015-04-21 10:22:31 -0700233 @param target_total The new target pool size.
234
235 @return The number of spares needed.
236
237 """
David James750c0382015-05-06 19:30:46 -0700238 num_ineligible = len(self.ineligible_hosts)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700239 if target_total < num_ineligible:
240 _log_error('%s %s pool: Target of %d is below '
241 'minimum of %d DUTs.',
242 self.board, self.pool,
243 target_total, num_ineligible)
244 _log_error('Adjusting target to %d DUTs.', num_ineligible)
245 target_total = num_ineligible
David James750c0382015-05-06 19:30:46 -0700246 adjustment = target_total - self.total_hosts
247 return len(self.broken_hosts) + adjustment
J. Richard Barnette91d56812015-04-21 10:22:31 -0700248
David James750c0382015-05-06 19:30:46 -0700249 def allocate_surplus(self, num_broken):
250 """Allocate a list DUTs that can returned as surplus.
J. Richard Barnette91d56812015-04-21 10:22:31 -0700251
252 Return a list of devices that can be returned in order to
253 reduce this pool's supply. Broken DUTs will be preferred
David James750c0382015-05-06 19:30:46 -0700254 over working ones.
J. Richard Barnette91d56812015-04-21 10:22:31 -0700255
256 The `num_broken` parameter indicates the number of broken
257 DUTs to be left in the pool. If this number exceeds the
258 number of broken DUTs actually in the pool, the returned
259 list will be empty. If this number is negative, it
260 indicates a number of working DUTs to be returned in
261 addition to all broken ones.
262
J. Richard Barnette91d56812015-04-21 10:22:31 -0700263 @param num_broken Total number of broken DUTs to be left in
264 this pool.
265
266 @return A list of DUTs to be returned as surplus.
267
268 """
269 if num_broken >= 0:
David James750c0382015-05-06 19:30:46 -0700270 surplus = self.broken_hosts[num_broken:]
J. Richard Barnette91d56812015-04-21 10:22:31 -0700271 return surplus
272 else:
David James750c0382015-05-06 19:30:46 -0700273 return (self.broken_hosts +
274 self.working_hosts[:-num_broken])
J. Richard Barnette91d56812015-04-21 10:22:31 -0700275
276
277def _exchange_labels(dry_run, hosts, target_pool, spare_pool):
278 """Reassign a list of DUTs from one pool to another.
279
280 For all the given hosts, remove all labels associated with
David James750c0382015-05-06 19:30:46 -0700281 `spare_pool`, and add the labels for `target_pool`.
J. Richard Barnette91d56812015-04-21 10:22:31 -0700282
283 If `dry_run` is true, perform no changes, but log the `atest`
284 commands needed to accomplish the necessary label changes.
285
286 @param dry_run Whether the logging is for a dry run or
287 for actual execution.
288 @param hosts List of DUTs (AFE hosts) to be reassigned.
289 @param target_pool The `_DUTPool` object from which the hosts
290 are drawn.
291 @param spare_pool The `_DUTPool` object to which the hosts
292 will be added.
293
294 """
295 if not hosts:
296 return
297 _log_info(dry_run, 'Transferring %d DUTs from %s to %s.',
298 len(hosts), spare_pool.pool, target_pool.pool)
299 additions = target_pool.pool_labels
300 removals = spare_pool.pool_labels
301 for host in hosts:
302 if not dry_run:
303 _log_message('Updating host: %s.', host.hostname)
Richard Barnette07303cb2016-04-15 16:56:16 -0700304 host.remove_labels(removals)
305 host.add_labels(additions)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700306 else:
307 _log_message('atest label remove -m %s %s',
308 host.hostname, ' '.join(removals))
309 _log_message('atest label add -m %s %s',
310 host.hostname, ' '.join(additions))
311
312
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700313def _balance_board(arguments, afe, board, pool, start_time, end_time):
J. Richard Barnette91d56812015-04-21 10:22:31 -0700314 """Balance one board as requested by command line arguments.
315
316 @param arguments Parsed command line arguments.
317 @param dry_run Whether the logging is for a dry run or
318 for actual execution.
319 @param afe AFE object to be used for the changes.
320 @param board Board to be balanced.
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700321 @param pool Pool of the board to be balanced.
J. Richard Barnette91d56812015-04-21 10:22:31 -0700322 @param start_time Start time for HostJobHistory objects in
323 the DUT pools.
324 @param end_time End time for HostJobHistory objects in the
325 DUT pools.
326
327 """
328 spare_pool = _DUTPool(afe, board, arguments.spare,
Richard Barnette07303cb2016-04-15 16:56:16 -0700329 start_time, end_time)
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700330 main_pool = _DUTPool(afe, board, pool,
J. Richard Barnette91d56812015-04-21 10:22:31 -0700331 start_time, end_time)
332
David James750c0382015-05-06 19:30:46 -0700333 target_total = main_pool.total_hosts
J. Richard Barnette91d56812015-04-21 10:22:31 -0700334 if arguments.total is not None:
335 target_total = arguments.total
336 elif arguments.grow:
337 target_total += arguments.grow
338 elif arguments.shrink:
339 target_total -= arguments.shrink
340
David James750c0382015-05-06 19:30:46 -0700341 spares_needed = main_pool.calculate_spares_needed(target_total)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700342 if spares_needed > 0:
David James750c0382015-05-06 19:30:46 -0700343 spare_duts = spare_pool.working_hosts[:spares_needed]
J. Richard Barnette91d56812015-04-21 10:22:31 -0700344 shortfall = spares_needed - len(spare_duts)
345 else:
346 spare_duts = []
347 shortfall = spares_needed
348
David James750c0382015-05-06 19:30:46 -0700349 surplus_duts = main_pool.allocate_surplus(shortfall)
350
351 if spares_needed or surplus_duts or arguments.verbose:
352 dry_run = arguments.dry_run
353 _log_message('')
354
355 _log_info(dry_run, 'Balancing %s %s pool:', board, main_pool.pool)
356 _log_info(dry_run,
357 'Total %d DUTs, %d working, %d broken, %d reserved.',
358 main_pool.total_hosts, len(main_pool.working_hosts),
359 len(main_pool.broken_hosts), len(main_pool.ineligible_hosts))
360
361 if spares_needed > 0:
362 add_msg = 'grow pool by %d DUTs' % spares_needed
363 elif spares_needed < 0:
364 add_msg = 'shrink pool by %d DUTs' % -spares_needed
365 else:
366 add_msg = 'no change to pool size'
367 _log_info(dry_run, 'Target is %d working DUTs; %s.',
368 target_total, add_msg)
369
370 _log_info(dry_run,
371 '%s %s pool has %d spares available.',
372 board, main_pool.pool, len(spare_pool.working_hosts))
373
374 if spares_needed > len(spare_duts):
375 _log_error('Not enough spares: need %d, only have %d.',
376 spares_needed, len(spare_duts))
377 elif shortfall >= 0:
378 _log_info(dry_run,
379 '%s %s pool will return %d broken DUTs, '
380 'leaving %d still in the pool.',
381 board, main_pool.pool,
382 len(surplus_duts),
383 len(main_pool.broken_hosts) - len(surplus_duts))
384 else:
385 _log_info(dry_run,
386 '%s %s pool will return %d surplus DUTs, '
387 'including %d working DUTs.',
388 board, main_pool.pool,
389 len(main_pool.broken_hosts) - shortfall,
390 -shortfall)
391
David Jamesf1d6e452015-07-17 15:23:04 -0700392 if (len(main_pool.broken_hosts) > arguments.max_broken and
393 not arguments.force_rebalance):
394 _log_error('%s %s pool: Refusing to act on pool with %d broken DUTs.',
395 board, main_pool.pool, len(main_pool.broken_hosts))
396 _log_error('Please investigate this board to see if there is a bug ')
397 _log_error('that is bricking devices. Once you have finished your ')
398 _log_error('investigation, you can force a rebalance with ')
399 _log_error('--force-rebalance')
400 return
401
J. Richard Barnette91d56812015-04-21 10:22:31 -0700402 if not spare_duts and not surplus_duts:
David James750c0382015-05-06 19:30:46 -0700403 if arguments.verbose:
404 _log_info(arguments.dry_run, 'No exchange required.')
J. Richard Barnette91d56812015-04-21 10:22:31 -0700405 return
406
407 _exchange_labels(arguments.dry_run, surplus_duts,
408 spare_pool, main_pool)
409 _exchange_labels(arguments.dry_run, spare_duts,
410 main_pool, spare_pool)
411
412
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700413def _too_many_broken_boards(inventory, pool, arguments):
414 """
415 Get the inventory of boards and check if too many boards are broken.
416
417 @param inventory: inventory object to determine board status inventory.
418 @param pool: The pool to check on for the board.
419 @param arguments Parsed command line arguments.
420
421 @return True if the number of boards with 1 or more broken duts exceed
422 max_broken_boards, False otherwise.
423 """
424 # Let's check if we even need to check for this max_broken_boards.
425 if arguments.force_rebalance or arguments.max_broken_boards == 0:
426 return False
427
428 # Let's get the number of broken duts for the specified pool and
429 # check that it's less than arguments.max_broken_boards. Or if
430 # it's not specified, calculate the default number of max broken
431 # boards based on the total number of boards per pool.
432 # TODO(kevcheng): Revisit to see if there's a better way to
433 # calculate the default max_broken_boards.
434 max_broken_boards = arguments.max_broken_boards
435 if max_broken_boards is None:
436 total_num_boards = len(inventory.get_managed_boards(pool=pool))
437 max_broken_boards = int(_MAX_BROKEN_BOARDS_DEFAULT_RATIO *
438 total_num_boards)
439 _log_info(arguments.dry_run,
440 'Default max broken boards calculated to be %d for '
441 '%s pool',
442 max_broken_boards, pool)
443
444
445 broken_boards = [board for board, counts in inventory.items()
446 if counts.get_broken(pool) != 0]
447 broken_boards.sort()
448 num_of_broken_boards = len(broken_boards)
449 # TODO(kevcheng): Track which boards have broken duts, we can limit the
450 # number of boards we go through in the main loop with this knowledge.
451 _log_message('There are %d boards in the %s pool with at least 1 '
452 'broken DUT (max threshold %d)', num_of_broken_boards,
453 pool, max_broken_boards)
454 for broken_board in broken_boards:
455 _log_message(broken_board)
456 return num_of_broken_boards > max_broken_boards
457
458
J. Richard Barnette91d56812015-04-21 10:22:31 -0700459def _parse_command(argv):
460 """Parse the command line arguments.
461
462 Create an argument parser for this command's syntax, parse the
463 command line, and return the result of the `ArgumentParser`
464 `parse_args()` method.
465
466 @param argv Standard command line argument vector; `argv[0]` is
467 assumed to be the command name.
468
469 @return Result returned by `ArgumentParser.parse_args()`.
470
471 """
472 parser = argparse.ArgumentParser(
473 prog=argv[0],
474 description='Balance pool shortages from spares on reserve')
475
476 count_group = parser.add_mutually_exclusive_group()
477 count_group.add_argument('-t', '--total', type=int,
478 metavar='COUNT', default=None,
479 help='Set the number of DUTs in the '
480 'pool to the specified count for '
481 'every BOARD')
482 count_group.add_argument('-a', '--grow', type=int,
483 metavar='COUNT', default=None,
484 help='Add the specified number of DUTs '
485 'to the pool for every BOARD')
486 count_group.add_argument('-d', '--shrink', type=int,
487 metavar='COUNT', default=None,
488 help='Remove the specified number of DUTs '
489 'from the pool for every BOARD')
490
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700491 parser.add_argument('-s', '--spare', default=_SPARE_DEFAULT,
J. Richard Barnette91d56812015-04-21 10:22:31 -0700492 metavar='POOL',
493 help='Pool from which to draw replacement '
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700494 'spares (default: pool:%s)' % _SPARE_DEFAULT)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700495 parser.add_argument('-n', '--dry-run', action='store_true',
496 help='Report actions to take in the form of '
497 'shell commands')
David James750c0382015-05-06 19:30:46 -0700498 parser.add_argument('-v', '--verbose', action='store_true',
499 help='Print more detail about calculations for debug '
500 'purposes.')
J. Richard Barnette91d56812015-04-21 10:22:31 -0700501
David Jamesf1d6e452015-07-17 15:23:04 -0700502 parser.add_argument('-m', '--max-broken', default=2, type=int,
503 metavar='COUNT',
504 help='Only rebalance a pool if it has at most '
505 'COUNT broken DUTs.')
506 parser.add_argument('-f', '--force-rebalance', action='store_true',
507 help='Forcefully rebalance all DUTs in a pool, even '
508 'if it has a large number of broken DUTs. '
509 'Before doing this, please investigate whether '
510 'there is a bug that is bricking devices in the '
511 'lab.')
512
David James8352bc22015-05-05 16:37:05 -0700513 parser.add_argument('--all-boards', action='store_true',
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700514 help='Rebalance all managed boards. This will do a '
515 'very expensive check to see how many boards have '
516 'at least one broken DUT. To bypass that check, '
517 'set --max-broken-boards to 0.')
518 parser.add_argument('--max-broken-boards',
519 default=None, type=int,
520 help='Only rebalance all boards if number of boards '
521 'with broken DUTs in the specified pool '
522 'is less than COUNT.')
David James8352bc22015-05-05 16:37:05 -0700523
J. Richard Barnette91d56812015-04-21 10:22:31 -0700524 parser.add_argument('pool',
525 metavar='POOL',
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700526 help='Name of the pool to balance. Use %s to balance '
527 'all critical pools' % _ALL_CRITICAL_POOLS)
David James8352bc22015-05-05 16:37:05 -0700528 parser.add_argument('boards', nargs='*',
J. Richard Barnette91d56812015-04-21 10:22:31 -0700529 metavar='BOARD',
David James8352bc22015-05-05 16:37:05 -0700530 help='Names of boards to balance.')
J. Richard Barnette91d56812015-04-21 10:22:31 -0700531
532 arguments = parser.parse_args(argv[1:])
David James8352bc22015-05-05 16:37:05 -0700533
534 # Error-check arguments.
535 if not arguments.boards and not arguments.all_boards:
536 parser.error('No boards specified. To balance all boards, use '
537 '--all-boards')
538 if arguments.boards and arguments.all_boards:
539 parser.error('Cannot specify boards with --all-boards.')
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700540 if (arguments.pool == _ALL_CRITICAL_POOLS and
541 arguments.spare != _SPARE_DEFAULT):
542 parser.error('Cannot specify --spare pool to be %s when balancing all '
543 'critical pools.' % _SPARE_DEFAULT)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700544 return arguments
545
546
547def main(argv):
548 """Standard main routine.
549
550 @param argv Command line arguments including `sys.argv[0]`.
551
552 """
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700553 def balancer(i, board, pool):
David James2a3cb542015-05-05 17:13:43 -0700554 """Balance the specified board.
555
556 @param i The index of the board.
557 @param board The board name.
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700558 @param pool The pool to rebalance for the board.
David James2a3cb542015-05-05 17:13:43 -0700559 """
560 if i > 0:
561 _log_message('')
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700562 _balance_board(arguments, afe, board, pool, start_time, end_time)
David James2a3cb542015-05-05 17:13:43 -0700563
J. Richard Barnette91d56812015-04-21 10:22:31 -0700564 arguments = _parse_command(argv)
565 end_time = time.time()
566 start_time = end_time - 24 * 60 * 60
David James2a3cb542015-05-05 17:13:43 -0700567 afe = frontend.AFE(server=None)
David James8352bc22015-05-05 16:37:05 -0700568 boards = arguments.boards
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700569 pools = (lab_inventory.CRITICAL_POOLS
570 if arguments.pool == _ALL_CRITICAL_POOLS
571 else [arguments.pool])
572 board_info = []
David James8352bc22015-05-05 16:37:05 -0700573 if arguments.all_boards:
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700574 inventory = lab_inventory.get_inventory(afe)
575 for pool in pools:
576 if _too_many_broken_boards(inventory, pool, arguments):
577 _log_error('Refusing to balance all boards for %s pool, '
578 'too many boards with at least 1 broken DUT '
579 'detected.', pool)
580 else:
581 boards_in_pool = inventory.get_managed_boards(pool=pool)
582 current_len_board_info = len(board_info)
583 board_info.extend([(i + current_len_board_info, board, pool)
584 for i, board in enumerate(boards_in_pool)])
585 else:
586 # We have specified boards with a specified pool, setup the args to the
587 # balancer properly.
588 for pool in pools:
589 current_len_board_info = len(board_info)
590 board_info.extend([(i + current_len_board_info, board, pool)
591 for i, board in enumerate(boards)])
J. Richard Barnette91d56812015-04-21 10:22:31 -0700592 try:
Kevin Chengcf0ad2b2016-04-19 14:51:39 -0700593 parallel.RunTasksInProcessPool(balancer, board_info, processes=8)
J. Richard Barnette91d56812015-04-21 10:22:31 -0700594 except KeyboardInterrupt:
595 pass
596
597
598if __name__ == '__main__':
599 main(sys.argv)