blob: c30e881238a3b004dbb6a7cd2ca51353643b77a0 [file] [log] [blame]
Mike Frysingerd03e6b52019-08-03 12:49:01 -04001#!/usr/bin/python2
Alex Millerb0b2d252014-06-25 17:17:01 -07002
Don Garrett40036362014-12-08 15:52:44 -08003from __future__ import print_function
4
5import argparse
Shuqian Zhao9febd452017-01-31 15:36:40 -08006import logging
Shuqian Zhaofad50672017-02-02 16:46:03 -08007import multiprocessing
J. Richard Barnette868cf642014-07-21 16:34:38 -07008import subprocess
9import sys
Ningning Xia9c0bcd22018-05-01 15:40:58 -070010from multiprocessing.pool import ThreadPool
Alex Millerb0b2d252014-06-25 17:17:01 -070011
Don Garrett40036362014-12-08 15:52:44 -080012import common
Don Garrett50713462015-01-07 18:04:05 -080013from autotest_lib.server import frontend
Alex Millerb0b2d252014-06-25 17:17:01 -070014from autotest_lib.site_utils.lib import infra
15
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070016DEPLOY_SERVER_LOCAL = ('/usr/local/autotest/site_utils/deploy_server_local.py')
Dan Shifb12d142015-06-09 23:30:11 -070017POOL_SIZE = 124
Dan Shifb12d142015-06-09 23:30:11 -070018
Alex Millerb0b2d252014-06-25 17:17:01 -070019
Aviv Keshet92281f72017-10-24 16:01:10 -070020def _filter_servers(servers):
21 """Filter a set of servers to those that should be deployed to."""
22 non_push_roles = {'devserver', 'crash_server', 'reserve'}
23 for s in servers:
24 if s['status'] == 'repair_required':
25 continue
Aviv Keshet9ec24b52017-10-31 11:02:10 -070026 if s['status'] == 'backup':
27 continue
Aviv Keshet92281f72017-10-24 16:01:10 -070028 if set(s['roles']) & non_push_roles:
29 continue
30 yield s
31
32
33def discover_servers(afe):
Don Garrett40036362014-12-08 15:52:44 -080034 """Discover the in-production servers to update.
Alex Millerb0b2d252014-06-25 17:17:01 -070035
Aviv Keshet92281f72017-10-24 16:01:10 -070036 Returns the set of servers from serverdb that are in production and should
37 be updated. This filters out servers in need of repair, or servers of roles
38 that are not yet supported by deploy_server / deploy_server_local.
39
Don Garretteecbc132015-01-08 17:26:20 -080040 @param afe: Server to contact with RPC requests.
41
Aviv Keshet92281f72017-10-24 16:01:10 -070042 @returns: A set of server hostnames.
Don Garrett40036362014-12-08 15:52:44 -080043 """
Don Garrett50713462015-01-07 18:04:05 -080044 # Example server details....
45 # {
46 # 'hostname': 'server1',
47 # 'status': 'backup',
48 # 'roles': ['drone', 'scheduler'],
49 # 'attributes': {'max_processes': 300}
50 # }
Don Garretteecbc132015-01-08 17:26:20 -080051 rpc = frontend.AFE(server=afe)
Don Garrett50713462015-01-07 18:04:05 -080052 servers = rpc.run('get_servers')
Don Garrett40036362014-12-08 15:52:44 -080053
Aviv Keshet92281f72017-10-24 16:01:10 -070054 return {s['hostname'] for s in _filter_servers(servers)}
Alex Millerb0b2d252014-06-25 17:17:01 -070055
J. Richard Barnettef533b182014-09-04 18:24:42 -070056
Aviv Keshet92281f72017-10-24 16:01:10 -070057def _parse_arguments(args):
Don Garrett40036362014-12-08 15:52:44 -080058 """Parse command line arguments.
59
60 @param args: The command line arguments to parse. (usually sys.argv[1:])
61
Aviv Keshet92281f72017-10-24 16:01:10 -070062 @returns A tuple of (argparse.Namespace populated with argument values,
63 list of extra args to pass to deploy_server_local).
Don Garrett40036362014-12-08 15:52:44 -080064 """
65 parser = argparse.ArgumentParser(
Don Garrett3f2b6602014-12-16 18:19:16 -080066 formatter_class=argparse.RawDescriptionHelpFormatter,
Aviv Keshet92281f72017-10-24 16:01:10 -070067 description='Run deploy_server_local on a bunch of servers. Extra '
68 'arguments will be passed through.',
Don Garrett3f2b6602014-12-16 18:19:16 -080069 epilog=('Update all servers:\n'
Aviv Keshet92281f72017-10-24 16:01:10 -070070 ' deploy_server.py -x --afe cautotest\n'
Don Garrett3f2b6602014-12-16 18:19:16 -080071 '\n'
72 'Update one server:\n'
Aviv Keshet92281f72017-10-24 16:01:10 -070073 ' deploy_server.py <server> -x\n'
74 ))
Don Garrett3f2b6602014-12-16 18:19:16 -080075
Aviv Keshet92281f72017-10-24 16:01:10 -070076 parser.add_argument('-x', action='store_true',
77 help='Actually perform actions. If not supplied, '
78 'script does nothing.')
79 parser.add_argument('--afe',
Shuqian Zhao6cf933b2017-09-27 15:07:56 -070080 help='The AFE server used to get servers from server_db,'
Aviv Keshet92281f72017-10-24 16:01:10 -070081 'e.g, cautotest. Used only if no SERVER specified.')
82 parser.add_argument('servers', action='store', nargs='*', metavar='SERVER')
Don Garrett40036362014-12-08 15:52:44 -080083
Aviv Keshet92281f72017-10-24 16:01:10 -070084 return parser.parse_known_args()
J. Richard Barnettef533b182014-09-04 18:24:42 -070085
86
Aviv Keshet92281f72017-10-24 16:01:10 -070087def _update_server(server, extra_args=[]):
88 """Run deploy_server_local for given server.
Dan Shifb12d142015-06-09 23:30:11 -070089
Aviv Keshet92281f72017-10-24 16:01:10 -070090 @param server: hostname to update.
91 @param extra_args: args to be passed in to deploy_server_local.
Dan Shifb12d142015-06-09 23:30:11 -070092
93 @return: A tuple of (server, success, output), where:
Aviv Keshet92281f72017-10-24 16:01:10 -070094 server: Name of the server.
Dan Shifb12d142015-06-09 23:30:11 -070095 sucess: True if update succeeds, False otherwise.
Shuqian Zhao8754a1a2016-08-24 12:54:11 -070096 output: A string of the deploy_server_local script output
Dan Shifb12d142015-06-09 23:30:11 -070097 including any errors.
Dan Shifb12d142015-06-09 23:30:11 -070098 """
Dan Shifb12d142015-06-09 23:30:11 -070099 cmd = ('%s %s' %
Aviv Keshet92281f72017-10-24 16:01:10 -0700100 (DEPLOY_SERVER_LOCAL, ' '.join(extra_args)))
101 success = False
102 try:
103 output = infra.execute_command(server, cmd)
104 success = True
105 except subprocess.CalledProcessError as e:
106 output = e.output
Dan Shi94c310d2016-03-18 11:27:38 -0700107
Dan Shifb12d142015-06-09 23:30:11 -0700108 return server, success, output
109
Aviv Keshet92281f72017-10-24 16:01:10 -0700110def _update_in_parallel(servers, extra_args=[]):
Dan Shifb12d142015-06-09 23:30:11 -0700111 """Update a group of servers in parallel.
112
Aviv Keshet92281f72017-10-24 16:01:10 -0700113 @param servers: A list of servers to update.
Dan Shifb12d142015-06-09 23:30:11 -0700114 @param options: Options for the push.
115
Aviv Keshet92281f72017-10-24 16:01:10 -0700116 @returns A dictionary from server names that failed to the output
117 of the update script.
Dan Shifb12d142015-06-09 23:30:11 -0700118 """
Shuqian Zhaofad50672017-02-02 16:46:03 -0800119 # Create a list to record all the finished servers.
120 manager = multiprocessing.Manager()
121 finished_servers = manager.list()
122
Aviv Keshet92281f72017-10-24 16:01:10 -0700123 do_server = lambda s: _update_server(s, extra_args)
124
Dan Shifb12d142015-06-09 23:30:11 -0700125 # The update actions run in parallel. If any update failed, we should wait
126 # for other running updates being finished. Abort in the middle of an update
127 # may leave the server in a bad state.
Ningning Xia9c0bcd22018-05-01 15:40:58 -0700128 pool = ThreadPool(POOL_SIZE)
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700129 try:
Aviv Keshet92281f72017-10-24 16:01:10 -0700130 results = pool.map_async(do_server, servers)
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700131 pool.close()
Shuqian Zhaofad50672017-02-02 16:46:03 -0800132
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700133 # Track the updating progress for current group of servers.
134 incomplete_servers = set()
135 server_names = set([s[0] for s in servers])
136 while not results.ready():
Aviv Keshet92281f72017-10-24 16:01:10 -0700137 incomplete_servers = sorted(set(servers) - set(finished_servers))
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700138 print('Not finished yet. %d servers in this group. '
139 '%d servers are still running:\n%s\n' %
140 (len(servers), len(incomplete_servers), incomplete_servers))
Aviv Keshet92281f72017-10-24 16:01:10 -0700141 # Check the progress every 20s
142 results.wait(20)
Shuqian Zhaofad50672017-02-02 16:46:03 -0800143
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700144 # After update finished, parse the result.
Aviv Keshet92281f72017-10-24 16:01:10 -0700145 failures = {}
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700146 for server, success, output in results.get():
Aviv Keshet92281f72017-10-24 16:01:10 -0700147 if not success:
148 failures[server] = output
149
150 return failures
151
Prathmesh Prabhuf7e50fd2017-03-16 14:39:13 -0700152 finally:
153 pool.terminate()
154 pool.join()
Dan Shifb12d142015-06-09 23:30:11 -0700155
156
Don Garrett40036362014-12-08 15:52:44 -0800157def main(args):
Aviv Keshet92281f72017-10-24 16:01:10 -0700158 """Entry point to deploy_server.py
Alex Millerb0b2d252014-06-25 17:17:01 -0700159
Don Garrett8679bfe2016-08-25 18:56:39 -0700160 @param args: The command line arguments to parse. (usually sys.argv)
J. Richard Barnette868cf642014-07-21 16:34:38 -0700161
Don Garrett40036362014-12-08 15:52:44 -0800162 @returns The system exit code.
163 """
Aviv Keshet92281f72017-10-24 16:01:10 -0700164 options, extra_args = _parse_arguments(args[1:])
Shuqian Zhao9febd452017-01-31 15:36:40 -0800165 # Remove all the handlers from the root logger to get rid of the handlers
166 # introduced by the import packages.
167 logging.getLogger().handlers = []
Aviv Keshet92281f72017-10-24 16:01:10 -0700168 logging.basicConfig(level=logging.DEBUG)
Alex Millerb0b2d252014-06-25 17:17:01 -0700169
Aviv Keshet92281f72017-10-24 16:01:10 -0700170 servers = options.servers
171 if not servers:
172 if not options.afe:
173 print('No servers or afe specified. Aborting')
174 return 1
175 print('Retrieving servers from %s..' % options.afe)
176 servers = discover_servers(options.afe)
177 print('Retrieved servers were: %s' % servers)
Alex Millerb0b2d252014-06-25 17:17:01 -0700178
Aviv Keshet92281f72017-10-24 16:01:10 -0700179 if not options.x:
180 print('Doing nothing because -x was not supplied.')
181 print('servers: %s' % options.servers)
182 print('extra args for deploy_server_local: %s' % extra_args)
183 return 0
Alex Millerb0b2d252014-06-25 17:17:01 -0700184
Aviv Keshet92281f72017-10-24 16:01:10 -0700185 failures = _update_in_parallel(servers, extra_args)
Don Garrett8679bfe2016-08-25 18:56:39 -0700186
Aviv Keshet92281f72017-10-24 16:01:10 -0700187 if not failures:
188 print('Completed all updates successfully.')
189 return 0
190
191 print('The following servers failed, with the following output:')
192 for s, o in failures.iteritems():
193 print('======== %s ========' % s)
194 print(o)
195
196 print('The servers that failed were:')
197 print('\n'.join(failures.keys()))
198 print('\n\nTo retry on failed servers, run the following command:')
199 retry_cmd = [args[0], '-x'] + failures.keys() + extra_args
200 print(' '.join(retry_cmd))
201 return 1
202
J. Richard Barnettef533b182014-09-04 18:24:42 -0700203
204
205if __name__ == '__main__':
Don Garrett8679bfe2016-08-25 18:56:39 -0700206 sys.exit(main(sys.argv))