blob: a77a58f238c420bd16f72459c28a104c7ba424a3 [file] [log] [blame]
Alex Millerb0b2d252014-06-25 17:17:01 -07001#!/usr/bin/python
2
Don Garrett40036362014-12-08 15:52:44 -08003from __future__ import print_function
4
5import argparse
Dan Shifb12d142015-06-09 23:30:11 -07006import multiprocessing.pool
J. Richard Barnette868cf642014-07-21 16:34:38 -07007import subprocess
8import sys
Dan Shi94c310d2016-03-18 11:27:38 -07009import time
Alex Millerb0b2d252014-06-25 17:17:01 -070010
Don Garrett40036362014-12-08 15:52:44 -080011import common
Don Garrett50713462015-01-07 18:04:05 -080012from autotest_lib.server import frontend
Alex Millerb0b2d252014-06-25 17:17:01 -070013from autotest_lib.site_utils.lib import infra
14
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070015DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
Dan Shifb12d142015-06-09 23:30:11 -070016POOL_SIZE = 124
17PUSH_ORDER = {'database': 0,
Dan Shib02ebe52016-04-07 11:52:21 -070018 'database_slave': 0,
Dan Shifb12d142015-06-09 23:30:11 -070019 'drone': 1,
20 'shard': 1,
21 'golo_proxy': 1,
Fang Denga09a37a2015-07-23 11:30:24 -070022 'afe': 2,
Dan Shifb12d142015-06-09 23:30:11 -070023 'scheduler': 2,
24 'host_scheduler': 2,
25 'suite_scheduler': 2}
26
Alex Millerb0b2d252014-06-25 17:17:01 -070027
Dan Shi57d4c732015-01-22 18:38:50 -080028def discover_servers(afe, server_filter=set()):
Don Garrett40036362014-12-08 15:52:44 -080029 """Discover the in-production servers to update.
Alex Millerb0b2d252014-06-25 17:17:01 -070030
Don Garretteecbc132015-01-08 17:26:20 -080031 @param afe: Server to contact with RPC requests.
Dan Shi57d4c732015-01-22 18:38:50 -080032 @param server_filter: A set of servers to get status for.
Don Garretteecbc132015-01-08 17:26:20 -080033
Dan Shifb12d142015-06-09 23:30:11 -070034 @returns: A list of a list of tuple of (server_name, server_status, roles).
35 The list is sorted by the order to be updated. Servers in the same
36 sublist can be pushed together.
37
Don Garrett40036362014-12-08 15:52:44 -080038 """
Don Garrett50713462015-01-07 18:04:05 -080039 # Example server details....
40 # {
41 # 'hostname': 'server1',
42 # 'status': 'backup',
43 # 'roles': ['drone', 'scheduler'],
44 # 'attributes': {'max_processes': 300}
45 # }
Don Garretteecbc132015-01-08 17:26:20 -080046 rpc = frontend.AFE(server=afe)
Don Garrett50713462015-01-07 18:04:05 -080047 servers = rpc.run('get_servers')
Don Garrett40036362014-12-08 15:52:44 -080048
Dan Shi57d4c732015-01-22 18:38:50 -080049 # Do not update servers that need repair, and filter the server list by
50 # given server_filter if needed.
51 servers = [s for s in servers
52 if (s['status'] != 'repair_required' and
53 (not server_filter or s['hostname'] in server_filter))]
Don Garrett40036362014-12-08 15:52:44 -080054
Dan Shi7020f122015-06-08 12:29:48 -070055 # Do not update reserve, devserver or crash_server (not YET supported).
Dan Shia1797382015-05-28 10:59:52 -070056 servers = [s for s in servers if 'devserver' not in s['roles'] and
Dan Shi7020f122015-06-08 12:29:48 -070057 'crash_server' not in s['roles'] and
58 'reserve' not in s['roles']]
Don Garrett50713462015-01-07 18:04:05 -080059
Dan Shifb12d142015-06-09 23:30:11 -070060 sorted_servers = []
61 for i in range(max(PUSH_ORDER.values()) + 1):
62 sorted_servers.append([])
63 servers_with_unknown_order = []
64 for server in servers:
65 info = (server['hostname'], server['status'], server['roles'])
66 try:
67 order = min([PUSH_ORDER[r] for r in server['roles']
68 if r in PUSH_ORDER])
69 sorted_servers[order].append(info)
70 except ValueError:
71 # All roles are not indexed in PUSH_ORDER.
72 servers_with_unknown_order.append(info)
Don Garrett50713462015-01-07 18:04:05 -080073
Dan Shifb12d142015-06-09 23:30:11 -070074 # Push all servers with unknown roles together.
75 if servers_with_unknown_order:
76 sorted_servers.append(servers_with_unknown_order)
Don Garrett50713462015-01-07 18:04:05 -080077
Dan Shi57d4c732015-01-22 18:38:50 -080078 found_servers = set([s['hostname'] for s in servers])
79 # Inject the servers passed in by user but not found in server database.
Dan Shifb12d142015-06-09 23:30:11 -070080 extra_servers = []
81 for server in server_filter - found_servers:
82 extra_servers.append((server, 'unknown', ['unknown']))
83 if extra_servers:
84 sorted_servers.append(extra_servers)
Dan Shi57d4c732015-01-22 18:38:50 -080085
Dan Shifb12d142015-06-09 23:30:11 -070086 return sorted_servers
Alex Millerb0b2d252014-06-25 17:17:01 -070087
J. Richard Barnettef533b182014-09-04 18:24:42 -070088
Don Garrett40036362014-12-08 15:52:44 -080089def parse_arguments(args):
90 """Parse command line arguments.
91
92 @param args: The command line arguments to parse. (usually sys.argv[1:])
93
94 @returns An argparse.Namespace populated with argument values.
95 """
96 parser = argparse.ArgumentParser(
Don Garrett3f2b6602014-12-16 18:19:16 -080097 formatter_class=argparse.RawDescriptionHelpFormatter,
98 description='Command to update an entire autotest installation.',
99 epilog=('Update all servers:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700100 ' deploy_server.py\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800101 '\n'
102 'Update one server:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700103 ' deploy_server.py <server>\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800104 '\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700105 'Send arguments to remote deploy_server_local.py:\n'
106 ' deploy_server.py -- --dryrun\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800107 '\n'
108 'See what arguments would be run on specified servers:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700109 ' deploy_server.py --dryrun <server_a> <server_b> --'
Don Garrett3f2b6602014-12-16 18:19:16 -0800110 ' --skip-update\n'))
111
Dan Shifb12d142015-06-09 23:30:11 -0700112 parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
113 help='Log all deploy script output.')
Don Garrett40036362014-12-08 15:52:44 -0800114 parser.add_argument('--continue', action='store_true', dest='cont',
Don Garretteecbc132015-01-08 17:26:20 -0800115 help='Continue to the next server on failure.')
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700116 parser.add_argument('--afe', required=True,
Don Garretteecbc132015-01-08 17:26:20 -0800117 help='What is the main server for this installation? (cautotest).')
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700118 parser.add_argument('--update_push_servers', action='store_true',
119 help='Indicate to update test_push servers.')
Don Garrett40036362014-12-08 15:52:44 -0800120 parser.add_argument('--dryrun', action='store_true',
Don Garretteecbc132015-01-08 17:26:20 -0800121 help='Don\'t actually run remote commands.')
Don Garrett40036362014-12-08 15:52:44 -0800122 parser.add_argument('args', nargs=argparse.REMAINDER,
Don Garretteecbc132015-01-08 17:26:20 -0800123 help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...'))
Don Garrett40036362014-12-08 15:52:44 -0800124
125 results = parser.parse_args(args)
126
Don Garrett3f2b6602014-12-16 18:19:16 -0800127 # We take the args list and further split it down. Everything before --
128 # is a server name, and everything after it is an argument to pass along
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700129 # to deploy_server_local.py.
Don Garrett3f2b6602014-12-16 18:19:16 -0800130 #
131 # This:
132 # server_a, server_b -- --dryrun --skip-report
133 #
134 # Becomes:
135 # args.servers['server_a', 'server_b']
136 # args.args['--dryrun', '--skip-report']
137 try:
138 local_args_index = results.args.index('--') + 1
139 except ValueError:
140 # If -- isn't present, they are all servers.
141 results.servers = results.args
142 results.args = []
143 else:
144 # Split arguments.
145 results.servers = results.args[:local_args_index-1]
146 results.args = results.args[local_args_index:]
Don Garrett40036362014-12-08 15:52:44 -0800147
148 return results
J. Richard Barnettef533b182014-09-04 18:24:42 -0700149
150
Dan Shifb12d142015-06-09 23:30:11 -0700151def update_server(inputs):
152 """Deploy for given server.
153
154 @param inputs: Inputs for the update action, including:
155 server: Name of the server to update.
156 status: Status of the server.
157 options: Options for the update.
158
159 @return: A tuple of (server, success, output), where:
160 server: Name of the server to be updated.
161 sucess: True if update succeeds, False otherwise.
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700162 output: A string of the deploy_server_local script output
Dan Shifb12d142015-06-09 23:30:11 -0700163 including any errors.
164
165 """
Dan Shi94c310d2016-03-18 11:27:38 -0700166 start = time.time()
Dan Shifb12d142015-06-09 23:30:11 -0700167 server = inputs['server']
168 status = inputs['status']
169 options = inputs['options']
170 print('Updating server %s...' % server)
171 if status == 'backup':
172 extra_args = ['--skip-service-status']
173 else:
174 extra_args = []
175
176 cmd = ('%s %s' %
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700177 (DEPLOY_SERVER_LOCAL, ' '.join(options.args + extra_args)))
Dan Shifb12d142015-06-09 23:30:11 -0700178 output = '%s: %s' % (server, cmd)
179 success = True
180 if not options.dryrun:
Dan Shi0db51972016-08-09 13:40:22 -0700181 for i in range(5):
182 try:
183 print('[%s/5] Try to update server %s' % (i, server))
184 output = infra.execute_command(server, cmd)
185 break
186 except subprocess.CalledProcessError as e:
187 print('%s: Command failed with error: %s' % (server, e))
188 success = False
189 output = e.output
Dan Shi94c310d2016-03-18 11:27:38 -0700190
191 print('Time used to update server %s: %s' % (server, time.time()-start))
Dan Shifb12d142015-06-09 23:30:11 -0700192 return server, success, output
193
194
195def update_in_parallel(servers, options):
196 """Update a group of servers in parallel.
197
Dan Shifb12d142015-06-09 23:30:11 -0700198 @param servers: A list of tuple of (server_name, server_status, roles).
199 @param options: Options for the push.
200
Don Garrett8679bfe2016-08-25 18:56:39 -0700201 @returns A list of servers that failed to update.
Dan Shifb12d142015-06-09 23:30:11 -0700202 """
203 args = []
204 for server, status, _ in servers:
205 args.append({'server': server,
206 'status': status,
207 'options': options})
208 # The update actions run in parallel. If any update failed, we should wait
209 # for other running updates being finished. Abort in the middle of an update
210 # may leave the server in a bad state.
211 pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
212 failed_servers = []
213 results = pool.imap_unordered(update_server, args)
214 for server, success, output in results:
215 if options.dryrun:
216 print('Dry run, updating server %s is skipped.' % server)
217 elif success:
218 print('Successfully updated server %s.' % server)
219 if options.verbose:
220 print(output)
221 print()
222 else:
223 error = ('Failed to update server %s.\nError: %s' %
224 (server, output))
225 print(error)
226 failed_servers.append(server)
Dan Shifb12d142015-06-09 23:30:11 -0700227
Don Garrett8679bfe2016-08-25 18:56:39 -0700228 return failed_servers
Dan Shifb12d142015-06-09 23:30:11 -0700229
Don Garrett40036362014-12-08 15:52:44 -0800230def main(args):
231 """Main routine that drives all the real work.
Alex Millerb0b2d252014-06-25 17:17:01 -0700232
Don Garrett8679bfe2016-08-25 18:56:39 -0700233 @param args: The command line arguments to parse. (usually sys.argv)
J. Richard Barnette868cf642014-07-21 16:34:38 -0700234
Don Garrett40036362014-12-08 15:52:44 -0800235 @returns The system exit code.
236 """
Don Garrett8679bfe2016-08-25 18:56:39 -0700237 options = parse_arguments(args[1:])
Alex Millerb0b2d252014-06-25 17:17:01 -0700238
Dan Shi57d4c732015-01-22 18:38:50 -0800239 print('Retrieving server status...')
Dan Shifb12d142015-06-09 23:30:11 -0700240 sorted_servers = discover_servers(options.afe, set(options.servers or []))
Alex Millerb0b2d252014-06-25 17:17:01 -0700241
Don Garrett40036362014-12-08 15:52:44 -0800242 # Display what we plan to update.
243 print('Will update (in this order):')
Dan Shifb12d142015-06-09 23:30:11 -0700244 i = 1
245 for servers in sorted_servers:
246 print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30))
247 for server, status, roles in servers:
248 print('\t%-36s:\t%s\t%s' % (server, status, roles))
249 i += 1
Don Garrett40036362014-12-08 15:52:44 -0800250 print()
Alex Millerb0b2d252014-06-25 17:17:01 -0700251
Don Garrett8679bfe2016-08-25 18:56:39 -0700252 failed = []
253 skipped = []
Dan Shifb12d142015-06-09 23:30:11 -0700254 for servers in sorted_servers:
Don Garrett8679bfe2016-08-25 18:56:39 -0700255 if not failed or options.cont:
256 failed += update_in_parallel(servers, options)
257 else:
258 skipped.extend(servers)
259
260 if failed:
261 print('Errors updating:')
262 for server in failed:
263 print(' %s' % server)
264 print()
265 print('To retry:')
266 print(' %s <options> %s', (args[0], ' '.join(failed + skipped)))
J. Richard Barnettef533b182014-09-04 18:24:42 -0700267
268
269if __name__ == '__main__':
Don Garrett8679bfe2016-08-25 18:56:39 -0700270 sys.exit(main(sys.argv))