Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 3 | from __future__ import print_function |
| 4 | |
| 5 | import argparse |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 6 | import multiprocessing.pool |
J. Richard Barnette | 868cf64 | 2014-07-21 16:34:38 -0700 | [diff] [blame] | 7 | import subprocess |
| 8 | import sys |
Dan Shi | 94c310d | 2016-03-18 11:27:38 -0700 | [diff] [blame] | 9 | import time |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 10 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 11 | import common |
Don Garrett | 5071346 | 2015-01-07 18:04:05 -0800 | [diff] [blame] | 12 | from autotest_lib.server import frontend |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 13 | from autotest_lib.site_utils.lib import infra |
| 14 | |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 15 | DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py') |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 16 | POOL_SIZE = 124 |
| 17 | PUSH_ORDER = {'database': 0, |
Dan Shi | b02ebe5 | 2016-04-07 11:52:21 -0700 | [diff] [blame] | 18 | 'database_slave': 0, |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 19 | 'drone': 1, |
| 20 | 'shard': 1, |
| 21 | 'golo_proxy': 1, |
Fang Deng | a09a37a | 2015-07-23 11:30:24 -0700 | [diff] [blame] | 22 | 'afe': 2, |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 23 | 'scheduler': 2, |
| 24 | 'host_scheduler': 2, |
| 25 | 'suite_scheduler': 2} |
| 26 | |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 27 | |
Dan Shi | 57d4c73 | 2015-01-22 18:38:50 -0800 | [diff] [blame] | 28 | def discover_servers(afe, server_filter=set()): |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 29 | """Discover the in-production servers to update. |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 30 | |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 31 | @param afe: Server to contact with RPC requests. |
Dan Shi | 57d4c73 | 2015-01-22 18:38:50 -0800 | [diff] [blame] | 32 | @param server_filter: A set of servers to get status for. |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 33 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 34 | @returns: A list of a list of tuple of (server_name, server_status, roles). |
| 35 | The list is sorted by the order to be updated. Servers in the same |
| 36 | sublist can be pushed together. |
| 37 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 38 | """ |
Don Garrett | 5071346 | 2015-01-07 18:04:05 -0800 | [diff] [blame] | 39 | # Example server details.... |
| 40 | # { |
| 41 | # 'hostname': 'server1', |
| 42 | # 'status': 'backup', |
| 43 | # 'roles': ['drone', 'scheduler'], |
| 44 | # 'attributes': {'max_processes': 300} |
| 45 | # } |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 46 | rpc = frontend.AFE(server=afe) |
Don Garrett | 5071346 | 2015-01-07 18:04:05 -0800 | [diff] [blame] | 47 | servers = rpc.run('get_servers') |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 48 | |
Dan Shi | 57d4c73 | 2015-01-22 18:38:50 -0800 | [diff] [blame] | 49 | # Do not update servers that need repair, and filter the server list by |
| 50 | # given server_filter if needed. |
| 51 | servers = [s for s in servers |
| 52 | if (s['status'] != 'repair_required' and |
| 53 | (not server_filter or s['hostname'] in server_filter))] |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 54 | |
Dan Shi | 7020f12 | 2015-06-08 12:29:48 -0700 | [diff] [blame] | 55 | # Do not update reserve, devserver or crash_server (not YET supported). |
Dan Shi | a179738 | 2015-05-28 10:59:52 -0700 | [diff] [blame] | 56 | servers = [s for s in servers if 'devserver' not in s['roles'] and |
Dan Shi | 7020f12 | 2015-06-08 12:29:48 -0700 | [diff] [blame] | 57 | 'crash_server' not in s['roles'] and |
| 58 | 'reserve' not in s['roles']] |
Don Garrett | 5071346 | 2015-01-07 18:04:05 -0800 | [diff] [blame] | 59 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 60 | sorted_servers = [] |
| 61 | for i in range(max(PUSH_ORDER.values()) + 1): |
| 62 | sorted_servers.append([]) |
| 63 | servers_with_unknown_order = [] |
| 64 | for server in servers: |
| 65 | info = (server['hostname'], server['status'], server['roles']) |
| 66 | try: |
| 67 | order = min([PUSH_ORDER[r] for r in server['roles'] |
| 68 | if r in PUSH_ORDER]) |
| 69 | sorted_servers[order].append(info) |
| 70 | except ValueError: |
| 71 | # All roles are not indexed in PUSH_ORDER. |
| 72 | servers_with_unknown_order.append(info) |
Don Garrett | 5071346 | 2015-01-07 18:04:05 -0800 | [diff] [blame] | 73 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 74 | # Push all servers with unknown roles together. |
| 75 | if servers_with_unknown_order: |
| 76 | sorted_servers.append(servers_with_unknown_order) |
Don Garrett | 5071346 | 2015-01-07 18:04:05 -0800 | [diff] [blame] | 77 | |
Dan Shi | 57d4c73 | 2015-01-22 18:38:50 -0800 | [diff] [blame] | 78 | found_servers = set([s['hostname'] for s in servers]) |
| 79 | # Inject the servers passed in by user but not found in server database. |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 80 | extra_servers = [] |
| 81 | for server in server_filter - found_servers: |
| 82 | extra_servers.append((server, 'unknown', ['unknown'])) |
| 83 | if extra_servers: |
| 84 | sorted_servers.append(extra_servers) |
Dan Shi | 57d4c73 | 2015-01-22 18:38:50 -0800 | [diff] [blame] | 85 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 86 | return sorted_servers |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 87 | |
J. Richard Barnette | f533b18 | 2014-09-04 18:24:42 -0700 | [diff] [blame] | 88 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 89 | def parse_arguments(args): |
| 90 | """Parse command line arguments. |
| 91 | |
| 92 | @param args: The command line arguments to parse. (usually sys.argv[1:]) |
| 93 | |
| 94 | @returns An argparse.Namespace populated with argument values. |
| 95 | """ |
| 96 | parser = argparse.ArgumentParser( |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 97 | formatter_class=argparse.RawDescriptionHelpFormatter, |
| 98 | description='Command to update an entire autotest installation.', |
| 99 | epilog=('Update all servers:\n' |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 100 | ' deploy_server.py\n' |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 101 | '\n' |
| 102 | 'Update one server:\n' |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 103 | ' deploy_server.py <server>\n' |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 104 | '\n' |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 105 | 'Send arguments to remote deploy_server_local.py:\n' |
| 106 | ' deploy_server.py -- --dryrun\n' |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 107 | '\n' |
| 108 | 'See what arguments would be run on specified servers:\n' |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 109 | ' deploy_server.py --dryrun <server_a> <server_b> --' |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 110 | ' --skip-update\n')) |
| 111 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 112 | parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', |
| 113 | help='Log all deploy script output.') |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 114 | parser.add_argument('--continue', action='store_true', dest='cont', |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 115 | help='Continue to the next server on failure.') |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 116 | parser.add_argument('--afe', required=True, |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 117 | help='What is the main server for this installation? (cautotest).') |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 118 | parser.add_argument('--update_push_servers', action='store_true', |
| 119 | help='Indicate to update test_push servers.') |
Shuqian Zhao | a3438a5 | 2016-09-20 15:11:02 -0700 | [diff] [blame^] | 120 | parser.add_argument('--force_update', action='store_true', |
| 121 | help='Force to run update commands for afe, tko, build_externals') |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 122 | parser.add_argument('--dryrun', action='store_true', |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 123 | help='Don\'t actually run remote commands.') |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 124 | parser.add_argument('args', nargs=argparse.REMAINDER, |
Don Garrett | eecbc13 | 2015-01-08 17:26:20 -0800 | [diff] [blame] | 125 | help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...')) |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 126 | |
| 127 | results = parser.parse_args(args) |
| 128 | |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 129 | # We take the args list and further split it down. Everything before -- |
| 130 | # is a server name, and everything after it is an argument to pass along |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 131 | # to deploy_server_local.py. |
Don Garrett | 3f2b660 | 2014-12-16 18:19:16 -0800 | [diff] [blame] | 132 | # |
| 133 | # This: |
| 134 | # server_a, server_b -- --dryrun --skip-report |
| 135 | # |
| 136 | # Becomes: |
| 137 | # args.servers['server_a', 'server_b'] |
| 138 | # args.args['--dryrun', '--skip-report'] |
| 139 | try: |
| 140 | local_args_index = results.args.index('--') + 1 |
| 141 | except ValueError: |
| 142 | # If -- isn't present, they are all servers. |
| 143 | results.servers = results.args |
| 144 | results.args = [] |
| 145 | else: |
| 146 | # Split arguments. |
| 147 | results.servers = results.args[:local_args_index-1] |
| 148 | results.args = results.args[local_args_index:] |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 149 | |
| 150 | return results |
J. Richard Barnette | f533b18 | 2014-09-04 18:24:42 -0700 | [diff] [blame] | 151 | |
| 152 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 153 | def update_server(inputs): |
| 154 | """Deploy for given server. |
| 155 | |
| 156 | @param inputs: Inputs for the update action, including: |
| 157 | server: Name of the server to update. |
| 158 | status: Status of the server. |
| 159 | options: Options for the update. |
| 160 | |
| 161 | @return: A tuple of (server, success, output), where: |
| 162 | server: Name of the server to be updated. |
| 163 | sucess: True if update succeeds, False otherwise. |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 164 | output: A string of the deploy_server_local script output |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 165 | including any errors. |
| 166 | |
| 167 | """ |
Dan Shi | 94c310d | 2016-03-18 11:27:38 -0700 | [diff] [blame] | 168 | start = time.time() |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 169 | server = inputs['server'] |
| 170 | status = inputs['status'] |
| 171 | options = inputs['options'] |
| 172 | print('Updating server %s...' % server) |
| 173 | if status == 'backup': |
| 174 | extra_args = ['--skip-service-status'] |
| 175 | else: |
| 176 | extra_args = [] |
| 177 | |
| 178 | cmd = ('%s %s' % |
Shuqian Zhao | 8754a1a | 2016-08-24 12:54:11 -0700 | [diff] [blame] | 179 | (DEPLOY_SERVER_LOCAL, ' '.join(options.args + extra_args))) |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 180 | output = '%s: %s' % (server, cmd) |
| 181 | success = True |
| 182 | if not options.dryrun: |
Dan Shi | 0db5197 | 2016-08-09 13:40:22 -0700 | [diff] [blame] | 183 | for i in range(5): |
| 184 | try: |
| 185 | print('[%s/5] Try to update server %s' % (i, server)) |
| 186 | output = infra.execute_command(server, cmd) |
| 187 | break |
| 188 | except subprocess.CalledProcessError as e: |
| 189 | print('%s: Command failed with error: %s' % (server, e)) |
| 190 | success = False |
| 191 | output = e.output |
Dan Shi | 94c310d | 2016-03-18 11:27:38 -0700 | [diff] [blame] | 192 | |
| 193 | print('Time used to update server %s: %s' % (server, time.time()-start)) |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 194 | return server, success, output |
| 195 | |
| 196 | |
| 197 | def update_in_parallel(servers, options): |
| 198 | """Update a group of servers in parallel. |
| 199 | |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 200 | @param servers: A list of tuple of (server_name, server_status, roles). |
| 201 | @param options: Options for the push. |
| 202 | |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 203 | @returns A list of servers that failed to update. |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 204 | """ |
| 205 | args = [] |
| 206 | for server, status, _ in servers: |
| 207 | args.append({'server': server, |
| 208 | 'status': status, |
| 209 | 'options': options}) |
| 210 | # The update actions run in parallel. If any update failed, we should wait |
| 211 | # for other running updates being finished. Abort in the middle of an update |
| 212 | # may leave the server in a bad state. |
| 213 | pool = multiprocessing.pool.ThreadPool(POOL_SIZE) |
| 214 | failed_servers = [] |
| 215 | results = pool.imap_unordered(update_server, args) |
| 216 | for server, success, output in results: |
| 217 | if options.dryrun: |
| 218 | print('Dry run, updating server %s is skipped.' % server) |
| 219 | elif success: |
| 220 | print('Successfully updated server %s.' % server) |
| 221 | if options.verbose: |
| 222 | print(output) |
| 223 | print() |
| 224 | else: |
| 225 | error = ('Failed to update server %s.\nError: %s' % |
| 226 | (server, output)) |
| 227 | print(error) |
| 228 | failed_servers.append(server) |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 229 | |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 230 | return failed_servers |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 231 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 232 | def main(args): |
| 233 | """Main routine that drives all the real work. |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 234 | |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 235 | @param args: The command line arguments to parse. (usually sys.argv) |
J. Richard Barnette | 868cf64 | 2014-07-21 16:34:38 -0700 | [diff] [blame] | 236 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 237 | @returns The system exit code. |
| 238 | """ |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 239 | options = parse_arguments(args[1:]) |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 240 | |
Dan Shi | 57d4c73 | 2015-01-22 18:38:50 -0800 | [diff] [blame] | 241 | print('Retrieving server status...') |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 242 | sorted_servers = discover_servers(options.afe, set(options.servers or [])) |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 243 | |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 244 | # Display what we plan to update. |
| 245 | print('Will update (in this order):') |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 246 | i = 1 |
| 247 | for servers in sorted_servers: |
| 248 | print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30)) |
| 249 | for server, status, roles in servers: |
| 250 | print('\t%-36s:\t%s\t%s' % (server, status, roles)) |
| 251 | i += 1 |
Don Garrett | 4003636 | 2014-12-08 15:52:44 -0800 | [diff] [blame] | 252 | print() |
Alex Miller | b0b2d25 | 2014-06-25 17:17:01 -0700 | [diff] [blame] | 253 | |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 254 | failed = [] |
| 255 | skipped = [] |
Dan Shi | fb12d14 | 2015-06-09 23:30:11 -0700 | [diff] [blame] | 256 | for servers in sorted_servers: |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 257 | if not failed or options.cont: |
| 258 | failed += update_in_parallel(servers, options) |
| 259 | else: |
| 260 | skipped.extend(servers) |
| 261 | |
| 262 | if failed: |
| 263 | print('Errors updating:') |
| 264 | for server in failed: |
| 265 | print(' %s' % server) |
| 266 | print() |
| 267 | print('To retry:') |
| 268 | print(' %s <options> %s', (args[0], ' '.join(failed + skipped))) |
J. Richard Barnette | f533b18 | 2014-09-04 18:24:42 -0700 | [diff] [blame] | 269 | |
| 270 | |
| 271 | if __name__ == '__main__': |
Don Garrett | 8679bfe | 2016-08-25 18:56:39 -0700 | [diff] [blame] | 272 | sys.exit(main(sys.argv)) |