blob: e5fe5ea8a971c4931de4349217305823ab0a369a [file] [log] [blame]
Alex Millerb0b2d252014-06-25 17:17:01 -07001#!/usr/bin/python
2
Don Garrett40036362014-12-08 15:52:44 -08003from __future__ import print_function
4
5import argparse
Dan Shifb12d142015-06-09 23:30:11 -07006import multiprocessing.pool
J. Richard Barnette868cf642014-07-21 16:34:38 -07007import subprocess
8import sys
Dan Shi94c310d2016-03-18 11:27:38 -07009import time
Alex Millerb0b2d252014-06-25 17:17:01 -070010
Don Garrett40036362014-12-08 15:52:44 -080011import common
Don Garrett50713462015-01-07 18:04:05 -080012from autotest_lib.server import frontend
Alex Millerb0b2d252014-06-25 17:17:01 -070013from autotest_lib.site_utils.lib import infra
14
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070015DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
Dan Shifb12d142015-06-09 23:30:11 -070016POOL_SIZE = 124
17PUSH_ORDER = {'database': 0,
Dan Shib02ebe52016-04-07 11:52:21 -070018 'database_slave': 0,
Dan Shifb12d142015-06-09 23:30:11 -070019 'drone': 1,
20 'shard': 1,
21 'golo_proxy': 1,
Fang Denga09a37a2015-07-23 11:30:24 -070022 'afe': 2,
Dan Shifb12d142015-06-09 23:30:11 -070023 'scheduler': 2,
24 'host_scheduler': 2,
25 'suite_scheduler': 2}
26
Alex Millerb0b2d252014-06-25 17:17:01 -070027
Dan Shi57d4c732015-01-22 18:38:50 -080028def discover_servers(afe, server_filter=set()):
Don Garrett40036362014-12-08 15:52:44 -080029 """Discover the in-production servers to update.
Alex Millerb0b2d252014-06-25 17:17:01 -070030
Don Garretteecbc132015-01-08 17:26:20 -080031 @param afe: Server to contact with RPC requests.
Dan Shi57d4c732015-01-22 18:38:50 -080032 @param server_filter: A set of servers to get status for.
Don Garretteecbc132015-01-08 17:26:20 -080033
Dan Shifb12d142015-06-09 23:30:11 -070034 @returns: A list of a list of tuple of (server_name, server_status, roles).
35 The list is sorted by the order to be updated. Servers in the same
36 sublist can be pushed together.
37
Don Garrett40036362014-12-08 15:52:44 -080038 """
Don Garrett50713462015-01-07 18:04:05 -080039 # Example server details....
40 # {
41 # 'hostname': 'server1',
42 # 'status': 'backup',
43 # 'roles': ['drone', 'scheduler'],
44 # 'attributes': {'max_processes': 300}
45 # }
Don Garretteecbc132015-01-08 17:26:20 -080046 rpc = frontend.AFE(server=afe)
Don Garrett50713462015-01-07 18:04:05 -080047 servers = rpc.run('get_servers')
Don Garrett40036362014-12-08 15:52:44 -080048
Dan Shi57d4c732015-01-22 18:38:50 -080049 # Do not update servers that need repair, and filter the server list by
50 # given server_filter if needed.
51 servers = [s for s in servers
52 if (s['status'] != 'repair_required' and
53 (not server_filter or s['hostname'] in server_filter))]
Don Garrett40036362014-12-08 15:52:44 -080054
Dan Shi7020f122015-06-08 12:29:48 -070055 # Do not update reserve, devserver or crash_server (not YET supported).
Dan Shia1797382015-05-28 10:59:52 -070056 servers = [s for s in servers if 'devserver' not in s['roles'] and
Dan Shi7020f122015-06-08 12:29:48 -070057 'crash_server' not in s['roles'] and
58 'reserve' not in s['roles']]
Don Garrett50713462015-01-07 18:04:05 -080059
Dan Shifb12d142015-06-09 23:30:11 -070060 sorted_servers = []
61 for i in range(max(PUSH_ORDER.values()) + 1):
62 sorted_servers.append([])
63 servers_with_unknown_order = []
64 for server in servers:
65 info = (server['hostname'], server['status'], server['roles'])
66 try:
67 order = min([PUSH_ORDER[r] for r in server['roles']
68 if r in PUSH_ORDER])
69 sorted_servers[order].append(info)
70 except ValueError:
71 # All roles are not indexed in PUSH_ORDER.
72 servers_with_unknown_order.append(info)
Don Garrett50713462015-01-07 18:04:05 -080073
Dan Shifb12d142015-06-09 23:30:11 -070074 # Push all servers with unknown roles together.
75 if servers_with_unknown_order:
76 sorted_servers.append(servers_with_unknown_order)
Don Garrett50713462015-01-07 18:04:05 -080077
Dan Shi57d4c732015-01-22 18:38:50 -080078 found_servers = set([s['hostname'] for s in servers])
79 # Inject the servers passed in by user but not found in server database.
Dan Shifb12d142015-06-09 23:30:11 -070080 extra_servers = []
81 for server in server_filter - found_servers:
82 extra_servers.append((server, 'unknown', ['unknown']))
83 if extra_servers:
84 sorted_servers.append(extra_servers)
Dan Shi57d4c732015-01-22 18:38:50 -080085
Dan Shifb12d142015-06-09 23:30:11 -070086 return sorted_servers
Alex Millerb0b2d252014-06-25 17:17:01 -070087
J. Richard Barnettef533b182014-09-04 18:24:42 -070088
Don Garrett40036362014-12-08 15:52:44 -080089def parse_arguments(args):
90 """Parse command line arguments.
91
92 @param args: The command line arguments to parse. (usually sys.argv[1:])
93
94 @returns An argparse.Namespace populated with argument values.
95 """
96 parser = argparse.ArgumentParser(
Don Garrett3f2b6602014-12-16 18:19:16 -080097 formatter_class=argparse.RawDescriptionHelpFormatter,
98 description='Command to update an entire autotest installation.',
99 epilog=('Update all servers:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700100 ' deploy_server.py\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800101 '\n'
102 'Update one server:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700103 ' deploy_server.py <server>\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800104 '\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700105 'Send arguments to remote deploy_server_local.py:\n'
106 ' deploy_server.py -- --dryrun\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800107 '\n'
108 'See what arguments would be run on specified servers:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700109 ' deploy_server.py --dryrun <server_a> <server_b> --'
Don Garrett3f2b6602014-12-16 18:19:16 -0800110 ' --skip-update\n'))
111
Dan Shifb12d142015-06-09 23:30:11 -0700112 parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
113 help='Log all deploy script output.')
Don Garrett40036362014-12-08 15:52:44 -0800114 parser.add_argument('--continue', action='store_true', dest='cont',
Don Garretteecbc132015-01-08 17:26:20 -0800115 help='Continue to the next server on failure.')
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700116 parser.add_argument('--afe', required=True,
Don Garretteecbc132015-01-08 17:26:20 -0800117 help='What is the main server for this installation? (cautotest).')
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700118 parser.add_argument('--update_push_servers', action='store_true',
119 help='Indicate to update test_push servers.')
Shuqian Zhaoa3438a52016-09-20 15:11:02 -0700120 parser.add_argument('--force_update', action='store_true',
121 help='Force to run update commands for afe, tko, build_externals')
Don Garrett40036362014-12-08 15:52:44 -0800122 parser.add_argument('--dryrun', action='store_true',
Don Garretteecbc132015-01-08 17:26:20 -0800123 help='Don\'t actually run remote commands.')
Don Garrett40036362014-12-08 15:52:44 -0800124 parser.add_argument('args', nargs=argparse.REMAINDER,
Don Garretteecbc132015-01-08 17:26:20 -0800125 help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...'))
Don Garrett40036362014-12-08 15:52:44 -0800126
127 results = parser.parse_args(args)
128
Don Garrett3f2b6602014-12-16 18:19:16 -0800129 # We take the args list and further split it down. Everything before --
130 # is a server name, and everything after it is an argument to pass along
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700131 # to deploy_server_local.py.
Don Garrett3f2b6602014-12-16 18:19:16 -0800132 #
133 # This:
134 # server_a, server_b -- --dryrun --skip-report
135 #
136 # Becomes:
137 # args.servers['server_a', 'server_b']
138 # args.args['--dryrun', '--skip-report']
139 try:
140 local_args_index = results.args.index('--') + 1
141 except ValueError:
142 # If -- isn't present, they are all servers.
143 results.servers = results.args
144 results.args = []
145 else:
146 # Split arguments.
147 results.servers = results.args[:local_args_index-1]
148 results.args = results.args[local_args_index:]
Don Garrett40036362014-12-08 15:52:44 -0800149
150 return results
J. Richard Barnettef533b182014-09-04 18:24:42 -0700151
152
Dan Shifb12d142015-06-09 23:30:11 -0700153def update_server(inputs):
154 """Deploy for given server.
155
156 @param inputs: Inputs for the update action, including:
157 server: Name of the server to update.
158 status: Status of the server.
159 options: Options for the update.
160
161 @return: A tuple of (server, success, output), where:
162 server: Name of the server to be updated.
163 sucess: True if update succeeds, False otherwise.
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700164 output: A string of the deploy_server_local script output
Dan Shifb12d142015-06-09 23:30:11 -0700165 including any errors.
166
167 """
Dan Shi94c310d2016-03-18 11:27:38 -0700168 start = time.time()
Dan Shifb12d142015-06-09 23:30:11 -0700169 server = inputs['server']
170 status = inputs['status']
171 options = inputs['options']
172 print('Updating server %s...' % server)
173 if status == 'backup':
174 extra_args = ['--skip-service-status']
175 else:
176 extra_args = []
177
178 cmd = ('%s %s' %
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700179 (DEPLOY_SERVER_LOCAL, ' '.join(options.args + extra_args)))
Dan Shifb12d142015-06-09 23:30:11 -0700180 output = '%s: %s' % (server, cmd)
181 success = True
182 if not options.dryrun:
Dan Shi0db51972016-08-09 13:40:22 -0700183 for i in range(5):
184 try:
185 print('[%s/5] Try to update server %s' % (i, server))
186 output = infra.execute_command(server, cmd)
187 break
188 except subprocess.CalledProcessError as e:
189 print('%s: Command failed with error: %s' % (server, e))
190 success = False
191 output = e.output
Dan Shi94c310d2016-03-18 11:27:38 -0700192
193 print('Time used to update server %s: %s' % (server, time.time()-start))
Dan Shifb12d142015-06-09 23:30:11 -0700194 return server, success, output
195
196
197def update_in_parallel(servers, options):
198 """Update a group of servers in parallel.
199
Dan Shifb12d142015-06-09 23:30:11 -0700200 @param servers: A list of tuple of (server_name, server_status, roles).
201 @param options: Options for the push.
202
Don Garrett8679bfe2016-08-25 18:56:39 -0700203 @returns A list of servers that failed to update.
Dan Shifb12d142015-06-09 23:30:11 -0700204 """
205 args = []
206 for server, status, _ in servers:
207 args.append({'server': server,
208 'status': status,
209 'options': options})
210 # The update actions run in parallel. If any update failed, we should wait
211 # for other running updates being finished. Abort in the middle of an update
212 # may leave the server in a bad state.
213 pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
214 failed_servers = []
215 results = pool.imap_unordered(update_server, args)
216 for server, success, output in results:
217 if options.dryrun:
218 print('Dry run, updating server %s is skipped.' % server)
219 elif success:
220 print('Successfully updated server %s.' % server)
221 if options.verbose:
222 print(output)
223 print()
224 else:
225 error = ('Failed to update server %s.\nError: %s' %
226 (server, output))
227 print(error)
228 failed_servers.append(server)
Dan Shifb12d142015-06-09 23:30:11 -0700229
Don Garrett8679bfe2016-08-25 18:56:39 -0700230 return failed_servers
Dan Shifb12d142015-06-09 23:30:11 -0700231
Don Garrett40036362014-12-08 15:52:44 -0800232def main(args):
233 """Main routine that drives all the real work.
Alex Millerb0b2d252014-06-25 17:17:01 -0700234
Don Garrett8679bfe2016-08-25 18:56:39 -0700235 @param args: The command line arguments to parse. (usually sys.argv)
J. Richard Barnette868cf642014-07-21 16:34:38 -0700236
Don Garrett40036362014-12-08 15:52:44 -0800237 @returns The system exit code.
238 """
Don Garrett8679bfe2016-08-25 18:56:39 -0700239 options = parse_arguments(args[1:])
Alex Millerb0b2d252014-06-25 17:17:01 -0700240
Dan Shi57d4c732015-01-22 18:38:50 -0800241 print('Retrieving server status...')
Dan Shifb12d142015-06-09 23:30:11 -0700242 sorted_servers = discover_servers(options.afe, set(options.servers or []))
Alex Millerb0b2d252014-06-25 17:17:01 -0700243
Don Garrett40036362014-12-08 15:52:44 -0800244 # Display what we plan to update.
245 print('Will update (in this order):')
Dan Shifb12d142015-06-09 23:30:11 -0700246 i = 1
247 for servers in sorted_servers:
248 print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30))
249 for server, status, roles in servers:
250 print('\t%-36s:\t%s\t%s' % (server, status, roles))
251 i += 1
Don Garrett40036362014-12-08 15:52:44 -0800252 print()
Alex Millerb0b2d252014-06-25 17:17:01 -0700253
Don Garrett8679bfe2016-08-25 18:56:39 -0700254 failed = []
255 skipped = []
Dan Shifb12d142015-06-09 23:30:11 -0700256 for servers in sorted_servers:
Don Garrett8679bfe2016-08-25 18:56:39 -0700257 if not failed or options.cont:
258 failed += update_in_parallel(servers, options)
259 else:
260 skipped.extend(servers)
261
262 if failed:
263 print('Errors updating:')
264 for server in failed:
265 print(' %s' % server)
266 print()
267 print('To retry:')
268 print(' %s <options> %s', (args[0], ' '.join(failed + skipped)))
J. Richard Barnettef533b182014-09-04 18:24:42 -0700269
270
271if __name__ == '__main__':
Don Garrett8679bfe2016-08-25 18:56:39 -0700272 sys.exit(main(sys.argv))