blob: e83e908f9eb2bdafc55cccb58a5a05575f1244b7 [file] [log] [blame]
Alex Millerb0b2d252014-06-25 17:17:01 -07001#!/usr/bin/python
2
Don Garrett40036362014-12-08 15:52:44 -08003from __future__ import print_function
4
5import argparse
Shuqian Zhao9febd452017-01-31 15:36:40 -08006import logging
Shuqian Zhaofad50672017-02-02 16:46:03 -08007import multiprocessing
J. Richard Barnette868cf642014-07-21 16:34:38 -07008import subprocess
9import sys
Alex Millerb0b2d252014-06-25 17:17:01 -070010
Don Garrett40036362014-12-08 15:52:44 -080011import common
Don Garrett50713462015-01-07 18:04:05 -080012from autotest_lib.server import frontend
Alex Millerb0b2d252014-06-25 17:17:01 -070013from autotest_lib.site_utils.lib import infra
14
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070015DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
Dan Shifb12d142015-06-09 23:30:11 -070016POOL_SIZE = 124
Dan Shifb12d142015-06-09 23:30:11 -070017
Alex Millerb0b2d252014-06-25 17:17:01 -070018
Aviv Keshet92281f72017-10-24 16:01:10 -070019def _filter_servers(servers):
20 """Filter a set of servers to those that should be deployed to."""
21 non_push_roles = {'devserver', 'crash_server', 'reserve'}
22 for s in servers:
23 if s['status'] == 'repair_required':
24 continue
Aviv Keshet9ec24b52017-10-31 11:02:10 -070025 if s['status'] == 'backup':
26 continue
Aviv Keshet92281f72017-10-24 16:01:10 -070027 if set(s['roles']) & non_push_roles:
28 continue
29 yield s
30
31
32def discover_servers(afe):
Don Garrett40036362014-12-08 15:52:44 -080033 """Discover the in-production servers to update.
Alex Millerb0b2d252014-06-25 17:17:01 -070034
Aviv Keshet92281f72017-10-24 16:01:10 -070035 Returns the set of servers from serverdb that are in production and should
36 be updated. This filters out servers in need of repair, or servers of roles
37 that are not yet supported by deploy_server / deploy_server_local.
38
Don Garretteecbc132015-01-08 17:26:20 -080039 @param afe: Server to contact with RPC requests.
40
Aviv Keshet92281f72017-10-24 16:01:10 -070041 @returns: A set of server hostnames.
Don Garrett40036362014-12-08 15:52:44 -080042 """
Don Garrett50713462015-01-07 18:04:05 -080043 # Example server details....
44 # {
45 # 'hostname': 'server1',
46 # 'status': 'backup',
47 # 'roles': ['drone', 'scheduler'],
48 # 'attributes': {'max_processes': 300}
49 # }
Don Garretteecbc132015-01-08 17:26:20 -080050 rpc = frontend.AFE(server=afe)
Don Garrett50713462015-01-07 18:04:05 -080051 servers = rpc.run('get_servers')
Don Garrett40036362014-12-08 15:52:44 -080052
Aviv Keshet92281f72017-10-24 16:01:10 -070053 return {s['hostname'] for s in _filter_servers(servers)}
Alex Millerb0b2d252014-06-25 17:17:01 -070054
J. Richard Barnettef533b182014-09-04 18:24:42 -070055
Aviv Keshet92281f72017-10-24 16:01:10 -070056def _parse_arguments(args):
Don Garrett40036362014-12-08 15:52:44 -080057 """Parse command line arguments.
58
59 @param args: The command line arguments to parse. (usually sys.argv[1:])
60
Aviv Keshet92281f72017-10-24 16:01:10 -070061 @returns A tuple of (argparse.Namespace populated with argument values,
62 list of extra args to pass to deploy_server_local).
Don Garrett40036362014-12-08 15:52:44 -080063 """
64 parser = argparse.ArgumentParser(
Don Garrett3f2b6602014-12-16 18:19:16 -080065 formatter_class=argparse.RawDescriptionHelpFormatter,
Aviv Keshet92281f72017-10-24 16:01:10 -070066 description='Run deploy_server_local on a bunch of servers. Extra '
67 'arguments will be passed through.',
Don Garrett3f2b6602014-12-16 18:19:16 -080068 epilog=('Update all servers:\n'
Aviv Keshet92281f72017-10-24 16:01:10 -070069 ' deploy_server.py -x --afe cautotest\n'
Don Garrett3f2b6602014-12-16 18:19:16 -080070 '\n'
71 'Update one server:\n'
Aviv Keshet92281f72017-10-24 16:01:10 -070072 ' deploy_server.py <server> -x\n'
73 ))
Don Garrett3f2b6602014-12-16 18:19:16 -080074
Aviv Keshet92281f72017-10-24 16:01:10 -070075 parser.add_argument('-x', action='store_true',
76 help='Actually perform actions. If not supplied, '
77 'script does nothing.')
78 parser.add_argument('--afe',
Shuqian Zhao6cf933b2017-09-27 15:07:56 -070079 help='The AFE server used to get servers from server_db,'
Aviv Keshet92281f72017-10-24 16:01:10 -070080 'e.g, cautotest. Used only if no SERVER specified.')
81 parser.add_argument('servers', action='store', nargs='*', metavar='SERVER')
Don Garrett40036362014-12-08 15:52:44 -080082
Aviv Keshet92281f72017-10-24 16:01:10 -070083 return parser.parse_known_args()
J. Richard Barnettef533b182014-09-04 18:24:42 -070084
85
Aviv Keshet92281f72017-10-24 16:01:10 -070086def _update_server(server, extra_args=[]):
87 """Run deploy_server_local for given server.
Dan Shifb12d142015-06-09 23:30:11 -070088
Aviv Keshet92281f72017-10-24 16:01:10 -070089 @param server: hostname to update.
90 @param extra_args: args to be passed in to deploy_server_local.
Dan Shifb12d142015-06-09 23:30:11 -070091
92 @return: A tuple of (server, success, output), where:
Aviv Keshet92281f72017-10-24 16:01:10 -070093 server: Name of the server.
Dan Shifb12d142015-06-09 23:30:11 -070094 sucess: True if update succeeds, False otherwise.
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070095 output: A string of the deploy_server_local script output
Dan Shifb12d142015-06-09 23:30:11 -070096 including any errors.
Dan Shifb12d142015-06-09 23:30:11 -070097 """
Dan Shifb12d142015-06-09 23:30:11 -070098 cmd = ('%s %s' %
Aviv Keshet92281f72017-10-24 16:01:10 -070099 (DEPLOY_SERVER_LOCAL, ' '.join(extra_args)))
100 success = False
101 try:
102 output = infra.execute_command(server, cmd)
103 success = True
104 except subprocess.CalledProcessError as e:
105 output = e.output
Dan Shi94c310d2016-03-18 11:27:38 -0700106
Dan Shifb12d142015-06-09 23:30:11 -0700107 return server, success, output
108
Aviv Keshet92281f72017-10-24 16:01:10 -0700109def _update_in_parallel(servers, extra_args=[]):
Dan Shifb12d142015-06-09 23:30:11 -0700110 """Update a group of servers in parallel.
111
Aviv Keshet92281f72017-10-24 16:01:10 -0700112 @param servers: A list of servers to update.
Dan Shifb12d142015-06-09 23:30:11 -0700113 @param options: Options for the push.
114
Aviv Keshet92281f72017-10-24 16:01:10 -0700115 @returns A dictionary from server names that failed to the output
116 of the update script.
Dan Shifb12d142015-06-09 23:30:11 -0700117 """
Shuqian Zhaofad50672017-02-02 16:46:03 -0800118 # Create a list to record all the finished servers.
119 manager = multiprocessing.Manager()
120 finished_servers = manager.list()
121
Aviv Keshet92281f72017-10-24 16:01:10 -0700122 do_server = lambda s: _update_server(s, extra_args)
123
Dan Shifb12d142015-06-09 23:30:11 -0700124 # The update actions run in parallel. If any update failed, we should wait
125 # for other running updates being finished. Abort in the middle of an update
126 # may leave the server in a bad state.
127 pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700128 try:
Aviv Keshet92281f72017-10-24 16:01:10 -0700129 results = pool.map_async(do_server, servers)
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700130 pool.close()
Shuqian Zhaofad50672017-02-02 16:46:03 -0800131
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700132 # Track the updating progress for current group of servers.
133 incomplete_servers = set()
134 server_names = set([s[0] for s in servers])
135 while not results.ready():
Aviv Keshet92281f72017-10-24 16:01:10 -0700136 incomplete_servers = sorted(set(servers) - set(finished_servers))
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700137 print('Not finished yet. %d servers in this group. '
138 '%d servers are still running:\n%s\n' %
139 (len(servers), len(incomplete_servers), incomplete_servers))
Aviv Keshet92281f72017-10-24 16:01:10 -0700140 # Check the progress every 20s
141 results.wait(20)
Shuqian Zhaofad50672017-02-02 16:46:03 -0800142
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700143 # After update finished, parse the result.
Aviv Keshet92281f72017-10-24 16:01:10 -0700144 failures = {}
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700145 for server, success, output in results.get():
Aviv Keshet92281f72017-10-24 16:01:10 -0700146 if not success:
147 failures[server] = output
148
149 return failures
150
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700151 finally:
152 pool.terminate()
153 pool.join()
Dan Shifb12d142015-06-09 23:30:11 -0700154
155
Don Garrett40036362014-12-08 15:52:44 -0800156def main(args):
Aviv Keshet92281f72017-10-24 16:01:10 -0700157 """Entry point to deploy_server.py
Alex Millerb0b2d252014-06-25 17:17:01 -0700158
Don Garrett8679bfe2016-08-25 18:56:39 -0700159 @param args: The command line arguments to parse. (usually sys.argv)
J. Richard Barnette868cf642014-07-21 16:34:38 -0700160
Don Garrett40036362014-12-08 15:52:44 -0800161 @returns The system exit code.
162 """
Aviv Keshet92281f72017-10-24 16:01:10 -0700163 options, extra_args = _parse_arguments(args[1:])
Shuqian Zhao9febd452017-01-31 15:36:40 -0800164 # Remove all the handlers from the root logger to get rid of the handlers
165 # introduced by the import packages.
166 logging.getLogger().handlers = []
Aviv Keshet92281f72017-10-24 16:01:10 -0700167 logging.basicConfig(level=logging.DEBUG)
Alex Millerb0b2d252014-06-25 17:17:01 -0700168
Aviv Keshet92281f72017-10-24 16:01:10 -0700169 servers = options.servers
170 if not servers:
171 if not options.afe:
172 print('No servers or afe specified. Aborting')
173 return 1
174 print('Retrieving servers from %s..' % options.afe)
175 servers = discover_servers(options.afe)
176 print('Retrieved servers were: %s' % servers)
Alex Millerb0b2d252014-06-25 17:17:01 -0700177
Aviv Keshet92281f72017-10-24 16:01:10 -0700178 if not options.x:
179 print('Doing nothing because -x was not supplied.')
180 print('servers: %s' % options.servers)
181 print('extra args for deploy_server_local: %s' % extra_args)
182 return 0
Alex Millerb0b2d252014-06-25 17:17:01 -0700183
Aviv Keshet92281f72017-10-24 16:01:10 -0700184 failures = _update_in_parallel(servers, extra_args)
Don Garrett8679bfe2016-08-25 18:56:39 -0700185
Aviv Keshet92281f72017-10-24 16:01:10 -0700186 if not failures:
187 print('Completed all updates successfully.')
188 return 0
189
190 print('The following servers failed, with the following output:')
191 for s, o in failures.iteritems():
192 print('======== %s ========' % s)
193 print(o)
194
195 print('The servers that failed were:')
196 print('\n'.join(failures.keys()))
197 print('\n\nTo retry on failed servers, run the following command:')
198 retry_cmd = [args[0], '-x'] + failures.keys() + extra_args
199 print(' '.join(retry_cmd))
200 return 1
201
J. Richard Barnettef533b182014-09-04 18:24:42 -0700202
203
204if __name__ == '__main__':
Don Garrett8679bfe2016-08-25 18:56:39 -0700205 sys.exit(main(sys.argv))