blob: d460b2a79ed041bd9308acc01139a18496c6bfd4 [file] [log] [blame]
Alex Millerb0b2d252014-06-25 17:17:01 -07001#!/usr/bin/python
2
Don Garrett40036362014-12-08 15:52:44 -08003from __future__ import print_function
4
5import argparse
Shuqian Zhao9febd452017-01-31 15:36:40 -08006import logging
Shuqian Zhaofad50672017-02-02 16:46:03 -08007import multiprocessing
Shuqian Zhao9febd452017-01-31 15:36:40 -08008import os
J. Richard Barnette868cf642014-07-21 16:34:38 -07009import subprocess
10import sys
Dan Shi94c310d2016-03-18 11:27:38 -070011import time
Alex Millerb0b2d252014-06-25 17:17:01 -070012
Don Garrett40036362014-12-08 15:52:44 -080013import common
Don Garrett50713462015-01-07 18:04:05 -080014from autotest_lib.server import frontend
Alex Millerb0b2d252014-06-25 17:17:01 -070015from autotest_lib.site_utils.lib import infra
16
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070017DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
Dan Shifb12d142015-06-09 23:30:11 -070018POOL_SIZE = 124
19PUSH_ORDER = {'database': 0,
Dan Shib02ebe52016-04-07 11:52:21 -070020 'database_slave': 0,
Dan Shifb12d142015-06-09 23:30:11 -070021 'drone': 1,
22 'shard': 1,
23 'golo_proxy': 1,
Fang Denga09a37a2015-07-23 11:30:24 -070024 'afe': 2,
Dan Shifb12d142015-06-09 23:30:11 -070025 'scheduler': 2,
26 'host_scheduler': 2,
27 'suite_scheduler': 2}
28
Alex Millerb0b2d252014-06-25 17:17:01 -070029
Dan Shi57d4c732015-01-22 18:38:50 -080030def discover_servers(afe, server_filter=set()):
Don Garrett40036362014-12-08 15:52:44 -080031 """Discover the in-production servers to update.
Alex Millerb0b2d252014-06-25 17:17:01 -070032
Don Garretteecbc132015-01-08 17:26:20 -080033 @param afe: Server to contact with RPC requests.
Dan Shi57d4c732015-01-22 18:38:50 -080034 @param server_filter: A set of servers to get status for.
Don Garretteecbc132015-01-08 17:26:20 -080035
Dan Shifb12d142015-06-09 23:30:11 -070036 @returns: A list of a list of tuple of (server_name, server_status, roles).
37 The list is sorted by the order to be updated. Servers in the same
38 sublist can be pushed together.
39
Don Garrett40036362014-12-08 15:52:44 -080040 """
Don Garrett50713462015-01-07 18:04:05 -080041 # Example server details....
42 # {
43 # 'hostname': 'server1',
44 # 'status': 'backup',
45 # 'roles': ['drone', 'scheduler'],
46 # 'attributes': {'max_processes': 300}
47 # }
Don Garretteecbc132015-01-08 17:26:20 -080048 rpc = frontend.AFE(server=afe)
Don Garrett50713462015-01-07 18:04:05 -080049 servers = rpc.run('get_servers')
Don Garrett40036362014-12-08 15:52:44 -080050
Dan Shi57d4c732015-01-22 18:38:50 -080051 # Do not update servers that need repair, and filter the server list by
52 # given server_filter if needed.
53 servers = [s for s in servers
54 if (s['status'] != 'repair_required' and
55 (not server_filter or s['hostname'] in server_filter))]
Don Garrett40036362014-12-08 15:52:44 -080056
Dan Shi7020f122015-06-08 12:29:48 -070057 # Do not update reserve, devserver or crash_server (not YET supported).
Dan Shia1797382015-05-28 10:59:52 -070058 servers = [s for s in servers if 'devserver' not in s['roles'] and
Dan Shi7020f122015-06-08 12:29:48 -070059 'crash_server' not in s['roles'] and
60 'reserve' not in s['roles']]
Don Garrett50713462015-01-07 18:04:05 -080061
Dan Shifb12d142015-06-09 23:30:11 -070062 sorted_servers = []
63 for i in range(max(PUSH_ORDER.values()) + 1):
64 sorted_servers.append([])
65 servers_with_unknown_order = []
66 for server in servers:
67 info = (server['hostname'], server['status'], server['roles'])
68 try:
69 order = min([PUSH_ORDER[r] for r in server['roles']
70 if r in PUSH_ORDER])
71 sorted_servers[order].append(info)
72 except ValueError:
73 # All roles are not indexed in PUSH_ORDER.
74 servers_with_unknown_order.append(info)
Don Garrett50713462015-01-07 18:04:05 -080075
Dan Shifb12d142015-06-09 23:30:11 -070076 # Push all servers with unknown roles together.
77 if servers_with_unknown_order:
78 sorted_servers.append(servers_with_unknown_order)
Don Garrett50713462015-01-07 18:04:05 -080079
Dan Shi57d4c732015-01-22 18:38:50 -080080 found_servers = set([s['hostname'] for s in servers])
81 # Inject the servers passed in by user but not found in server database.
Dan Shifb12d142015-06-09 23:30:11 -070082 extra_servers = []
83 for server in server_filter - found_servers:
84 extra_servers.append((server, 'unknown', ['unknown']))
85 if extra_servers:
86 sorted_servers.append(extra_servers)
Dan Shi57d4c732015-01-22 18:38:50 -080087
Dan Shifb12d142015-06-09 23:30:11 -070088 return sorted_servers
Alex Millerb0b2d252014-06-25 17:17:01 -070089
J. Richard Barnettef533b182014-09-04 18:24:42 -070090
Don Garrett40036362014-12-08 15:52:44 -080091def parse_arguments(args):
92 """Parse command line arguments.
93
94 @param args: The command line arguments to parse. (usually sys.argv[1:])
95
96 @returns An argparse.Namespace populated with argument values.
97 """
98 parser = argparse.ArgumentParser(
Don Garrett3f2b6602014-12-16 18:19:16 -080099 formatter_class=argparse.RawDescriptionHelpFormatter,
100 description='Command to update an entire autotest installation.',
101 epilog=('Update all servers:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700102 ' deploy_server.py\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800103 '\n'
104 'Update one server:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700105 ' deploy_server.py <server>\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800106 '\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700107 'Send arguments to remote deploy_server_local.py:\n'
108 ' deploy_server.py -- --dryrun\n'
Don Garrett3f2b6602014-12-16 18:19:16 -0800109 '\n'
110 'See what arguments would be run on specified servers:\n'
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700111 ' deploy_server.py --dryrun <server_a> <server_b> --'
Don Garrett3f2b6602014-12-16 18:19:16 -0800112 ' --skip-update\n'))
113
Dan Shifb12d142015-06-09 23:30:11 -0700114 parser.add_argument('-v', '--verbose', action='store_true', dest='verbose',
115 help='Log all deploy script output.')
Don Garrett40036362014-12-08 15:52:44 -0800116 parser.add_argument('--continue', action='store_true', dest='cont',
Don Garretteecbc132015-01-08 17:26:20 -0800117 help='Continue to the next server on failure.')
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700118 parser.add_argument('--afe', required=True,
Don Garretteecbc132015-01-08 17:26:20 -0800119 help='What is the main server for this installation? (cautotest).')
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700120 parser.add_argument('--update_push_servers', action='store_true',
121 help='Indicate to update test_push servers.')
Shuqian Zhaoa3438a52016-09-20 15:11:02 -0700122 parser.add_argument('--force_update', action='store_true',
123 help='Force to run update commands for afe, tko, build_externals')
Don Garrett40036362014-12-08 15:52:44 -0800124 parser.add_argument('--dryrun', action='store_true',
Don Garretteecbc132015-01-08 17:26:20 -0800125 help='Don\'t actually run remote commands.')
Shuqian Zhao9febd452017-01-31 15:36:40 -0800126 parser.add_argument('--logfile', action='store',
127 default='/tmp/deployment.log',
128 help='Path to the file to save the deployment log to. Default is '
129 '/tmp/deployment.log')
Don Garrett40036362014-12-08 15:52:44 -0800130 parser.add_argument('args', nargs=argparse.REMAINDER,
Don Garretteecbc132015-01-08 17:26:20 -0800131 help=('<server>, <server> ... -- <remote_arg>, <remote_arg> ...'))
Don Garrett40036362014-12-08 15:52:44 -0800132
133 results = parser.parse_args(args)
134
Don Garrett3f2b6602014-12-16 18:19:16 -0800135 # We take the args list and further split it down. Everything before --
136 # is a server name, and everything after it is an argument to pass along
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700137 # to deploy_server_local.py.
Don Garrett3f2b6602014-12-16 18:19:16 -0800138 #
139 # This:
140 # server_a, server_b -- --dryrun --skip-report
141 #
142 # Becomes:
143 # args.servers['server_a', 'server_b']
144 # args.args['--dryrun', '--skip-report']
145 try:
146 local_args_index = results.args.index('--') + 1
147 except ValueError:
148 # If -- isn't present, they are all servers.
149 results.servers = results.args
150 results.args = []
151 else:
152 # Split arguments.
153 results.servers = results.args[:local_args_index-1]
154 results.args = results.args[local_args_index:]
Don Garrett40036362014-12-08 15:52:44 -0800155
156 return results
J. Richard Barnettef533b182014-09-04 18:24:42 -0700157
158
Dan Shifb12d142015-06-09 23:30:11 -0700159def update_server(inputs):
160 """Deploy for given server.
161
162 @param inputs: Inputs for the update action, including:
163 server: Name of the server to update.
164 status: Status of the server.
165 options: Options for the update.
166
167 @return: A tuple of (server, success, output), where:
168 server: Name of the server to be updated.
169 sucess: True if update succeeds, False otherwise.
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700170 output: A string of the deploy_server_local script output
Dan Shifb12d142015-06-09 23:30:11 -0700171 including any errors.
172
173 """
Dan Shi94c310d2016-03-18 11:27:38 -0700174 start = time.time()
Dan Shifb12d142015-06-09 23:30:11 -0700175 server = inputs['server']
176 status = inputs['status']
Shuqian Zhaofad50672017-02-02 16:46:03 -0800177 # Shared list to record the finished server.
178 finished_servers = inputs['finished_servers']
Dan Shifb12d142015-06-09 23:30:11 -0700179 options = inputs['options']
180 print('Updating server %s...' % server)
181 if status == 'backup':
182 extra_args = ['--skip-service-status']
183 else:
184 extra_args = []
185
186 cmd = ('%s %s' %
Shuqian Zhao8754a1a2016-08-24 12:54:11 -0700187 (DEPLOY_SERVER_LOCAL, ' '.join(options.args + extra_args)))
Dan Shifb12d142015-06-09 23:30:11 -0700188 output = '%s: %s' % (server, cmd)
189 success = True
190 if not options.dryrun:
Dan Shi0db51972016-08-09 13:40:22 -0700191 for i in range(5):
192 try:
193 print('[%s/5] Try to update server %s' % (i, server))
194 output = infra.execute_command(server, cmd)
Shuqian Zhaofad50672017-02-02 16:46:03 -0800195 finished_servers.append(server)
Dan Shi0db51972016-08-09 13:40:22 -0700196 break
197 except subprocess.CalledProcessError as e:
198 print('%s: Command failed with error: %s' % (server, e))
199 success = False
200 output = e.output
Dan Shi94c310d2016-03-18 11:27:38 -0700201
202 print('Time used to update server %s: %s' % (server, time.time()-start))
Dan Shifb12d142015-06-09 23:30:11 -0700203 return server, success, output
204
205
206def update_in_parallel(servers, options):
207 """Update a group of servers in parallel.
208
Dan Shifb12d142015-06-09 23:30:11 -0700209 @param servers: A list of tuple of (server_name, server_status, roles).
210 @param options: Options for the push.
211
Don Garrett8679bfe2016-08-25 18:56:39 -0700212 @returns A list of servers that failed to update.
Dan Shifb12d142015-06-09 23:30:11 -0700213 """
Shuqian Zhaofad50672017-02-02 16:46:03 -0800214 # Create a list to record all the finished servers.
215 manager = multiprocessing.Manager()
216 finished_servers = manager.list()
217
Dan Shifb12d142015-06-09 23:30:11 -0700218 args = []
219 for server, status, _ in servers:
220 args.append({'server': server,
221 'status': status,
Shuqian Zhaofad50672017-02-02 16:46:03 -0800222 'finished_servers': finished_servers,
Dan Shifb12d142015-06-09 23:30:11 -0700223 'options': options})
224 # The update actions run in parallel. If any update failed, we should wait
225 # for other running updates being finished. Abort in the middle of an update
226 # may leave the server in a bad state.
227 pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
228 failed_servers = []
Shuqian Zhaofad50672017-02-02 16:46:03 -0800229 results = pool.map_async(update_server, args)
230 pool.close()
231
232 # Track the updating progress for current group of servers.
233 incomplete_servers = set()
234 server_names = set([s[0] for s in servers])
235 while not results.ready():
236 incomplete_servers = server_names - set(finished_servers)
237 print('Not finished yet. %d servers in this group. '
238 '%d servers are still running:\n%s\n' %
239 (len(servers), len(incomplete_servers), incomplete_servers))
240 # Check the progress every 1 mins
241 results.wait(60)
242
243 # After update finished, parse the result.
244 for server, success, output in results.get():
Dan Shifb12d142015-06-09 23:30:11 -0700245 if options.dryrun:
246 print('Dry run, updating server %s is skipped.' % server)
Dan Shifb12d142015-06-09 23:30:11 -0700247 else:
Shuqian Zhao9febd452017-01-31 15:36:40 -0800248 if success:
Shuqian Zhaofad50672017-02-02 16:46:03 -0800249 msg = ('Successfully updated server %s.\n' % server)
Shuqian Zhao9febd452017-01-31 15:36:40 -0800250 if options.verbose:
251 print(output)
252 print()
253 else:
254 msg = ('Failed to update server %s.\nError: %s' %
255 (server, output.strip()))
256 print(msg)
257 failed_servers.append(server)
258 # Write the result into logfile.
259 with open(options.logfile, 'a') as f:
260 f.write(msg)
Dan Shifb12d142015-06-09 23:30:11 -0700261
Don Garrett8679bfe2016-08-25 18:56:39 -0700262 return failed_servers
Dan Shifb12d142015-06-09 23:30:11 -0700263
Don Garrett40036362014-12-08 15:52:44 -0800264def main(args):
265 """Main routine that drives all the real work.
Alex Millerb0b2d252014-06-25 17:17:01 -0700266
Don Garrett8679bfe2016-08-25 18:56:39 -0700267 @param args: The command line arguments to parse. (usually sys.argv)
J. Richard Barnette868cf642014-07-21 16:34:38 -0700268
Don Garrett40036362014-12-08 15:52:44 -0800269 @returns The system exit code.
270 """
Don Garrett8679bfe2016-08-25 18:56:39 -0700271 options = parse_arguments(args[1:])
Shuqian Zhao9febd452017-01-31 15:36:40 -0800272 # Remove all the handlers from the root logger to get rid of the handlers
273 # introduced by the import packages.
274 logging.getLogger().handlers = []
275 logging.basicConfig(level=logging.DEBUG
276 if options.verbose else logging.INFO)
Alex Millerb0b2d252014-06-25 17:17:01 -0700277
Dan Shi57d4c732015-01-22 18:38:50 -0800278 print('Retrieving server status...')
Dan Shifb12d142015-06-09 23:30:11 -0700279 sorted_servers = discover_servers(options.afe, set(options.servers or []))
Alex Millerb0b2d252014-06-25 17:17:01 -0700280
Don Garrett40036362014-12-08 15:52:44 -0800281 # Display what we plan to update.
282 print('Will update (in this order):')
Dan Shifb12d142015-06-09 23:30:11 -0700283 i = 1
284 for servers in sorted_servers:
285 print('%s Group %d (%d servers) %s' % ('='*30, i, len(servers), '='*30))
286 for server, status, roles in servers:
287 print('\t%-36s:\t%s\t%s' % (server, status, roles))
288 i += 1
Don Garrett40036362014-12-08 15:52:44 -0800289 print()
Alex Millerb0b2d252014-06-25 17:17:01 -0700290
Shuqian Zhao9febd452017-01-31 15:36:40 -0800291 if os.path.exists(options.logfile):
292 os.remove(options.logfile)
293 print ('Start updating, push logs of every server will be saved '
294 'at %s' % options.logfile)
Don Garrett8679bfe2016-08-25 18:56:39 -0700295 failed = []
296 skipped = []
Dan Shifb12d142015-06-09 23:30:11 -0700297 for servers in sorted_servers:
Don Garrett8679bfe2016-08-25 18:56:39 -0700298 if not failed or options.cont:
299 failed += update_in_parallel(servers, options)
300 else:
Don Garrett2b54a3c2016-11-03 16:39:53 -0700301 skipped.extend(s[0] for s in servers) # Only include server name.
Don Garrett8679bfe2016-08-25 18:56:39 -0700302
303 if failed:
304 print('Errors updating:')
305 for server in failed:
306 print(' %s' % server)
307 print()
308 print('To retry:')
Aviv Keshete9274312016-10-07 01:03:11 -0700309 print(' %s <options> %s' %
310 (str(args[0]), str(' '.join(failed + skipped))))
Shuqian Zhao57830762016-12-13 17:50:33 -0800311 # Exit with error.
312 return 1
J. Richard Barnettef533b182014-09-04 18:24:42 -0700313
314
315if __name__ == '__main__':
Don Garrett8679bfe2016-08-25 18:56:39 -0700316 sys.exit(main(sys.argv))