Dan Shi | 56f1ba7 | 2014-12-03 19:16:53 -0800 | [diff] [blame] | 1 | # Copyright 2014 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """This module provides utility functions to help managing servers in server |
| 6 | database (defined in global config section AUTOTEST_SERVER_DB). |
| 7 | |
| 8 | After a role is added or removed from a server, certain services may need to |
| 9 | be restarted. For example, scheduler needs to be restarted after a drone is |
| 10 | added to a primary server. This module includes functions to check if actions |
| 11 | are required to be executed and what actions to executed on which servers. |
| 12 | """ |
| 13 | |
| 14 | import subprocess |
| 15 | import sys |
| 16 | |
| 17 | import common |
| 18 | |
| 19 | from autotest_lib.frontend.server import models as server_models |
| 20 | from autotest_lib.site_utils import server_manager_utils |
| 21 | from autotest_lib.site_utils.lib import infra |
| 22 | |
| 23 | |
| 24 | # Actions that must be executed for server management action to be effective. |
| 25 | # Each action is a tuple: |
| 26 | # (the role of which the command should be executed, the command) |
| 27 | RESTART_SCHEDULER = (server_models.ServerRole.ROLE.SCHEDULER, |
| 28 | 'sudo service scheduler restart') |
| 29 | RESTART_HOST_SCHEDULER = (server_models.ServerRole.ROLE.HOST_SCHEDULER, |
| 30 | 'sudo service host-scheduler restart') |
| 31 | RESTART_SUITE_SCHEDULER = (server_models.ServerRole.ROLE.SUITE_SCHEDULER, |
| 32 | 'sudo service suite_scheduler restart') |
| 33 | RELOAD_APACHE = (server_models.ServerRole.ROLE.SCHEDULER, |
| 34 | 'sudo service apache reload') |
| 35 | |
| 36 | STOP_SCHEDULER = (server_models.ServerRole.ROLE.SCHEDULER, |
| 37 | 'sudo service scheduler stop') |
| 38 | STOP_HOST_SCHEDULER = (server_models.ServerRole.ROLE.HOST_SCHEDULER, |
| 39 | 'sudo service host-scheduler stop') |
| 40 | STOP_SUITE_SCHEDULER = (server_models.ServerRole.ROLE.SUITE_SCHEDULER, |
| 41 | 'sudo service suite_scheduler stop') |
| 42 | |
| 43 | # Dictionary of actions needed for a role to be enabled. Key is the role, and |
| 44 | # value is a list of action. All these actions should be applied after the role |
| 45 | # is added to the server, or the server's status is changed to primary. |
| 46 | ACTIONS_AFTER_ROLE_APPLIED = { |
| 47 | server_models.ServerRole.ROLE.SCHEDULER: [RESTART_SCHEDULER], |
| 48 | server_models.ServerRole.ROLE.HOST_SCHEDULER: [RESTART_HOST_SCHEDULER], |
| 49 | server_models.ServerRole.ROLE.SUITE_SCHEDULER: |
| 50 | [RESTART_SUITE_SCHEDULER], |
| 51 | server_models.ServerRole.ROLE.DRONE: [RESTART_SCHEDULER], |
| 52 | server_models.ServerRole.ROLE.DATABASE: |
| 53 | [RESTART_SCHEDULER, RESTART_HOST_SCHEDULER, RELOAD_APACHE], |
| 54 | server_models.ServerRole.ROLE.DEVSERVER: [RESTART_SCHEDULER], |
| 55 | } |
| 56 | |
| 57 | # Dictionary of actions needed for a role to be disabled. Key is the role, and |
| 58 | # value is a list of action. |
| 59 | # Action should be taken before role is deleted from a server, or the server's |
| 60 | # status is changed to primary. |
| 61 | ACTIONS_BEFORE_ROLE_REMOVED = { |
| 62 | server_models.ServerRole.ROLE.SCHEDULER: [STOP_SCHEDULER], |
| 63 | server_models.ServerRole.ROLE.HOST_SCHEDULER: [STOP_HOST_SCHEDULER], |
| 64 | server_models.ServerRole.ROLE.SUITE_SCHEDULER: [STOP_SUITE_SCHEDULER], |
| 65 | server_models.ServerRole.ROLE.DATABASE: |
| 66 | [STOP_SCHEDULER, STOP_HOST_SCHEDULER], |
| 67 | } |
| 68 | # Action should be taken after role is deleted from a server, or the server's |
| 69 | # status is changed to primary. |
| 70 | ACTIONS_AFTER_ROLE_REMOVED = { |
| 71 | server_models.ServerRole.ROLE.DRONE: [RESTART_SCHEDULER], |
| 72 | server_models.ServerRole.ROLE.DEVSERVER: [RESTART_SCHEDULER], |
| 73 | } |
| 74 | |
| 75 | |
| 76 | def apply(action): |
| 77 | """Apply an given action. |
| 78 | |
| 79 | It usually involves ssh to the server with specific role and run the |
| 80 | command, e.g., ssh to scheduler server and restart scheduler. |
| 81 | |
| 82 | @param action: A tuple of (the role of which the command should be executed, |
| 83 | the command) |
| 84 | @raise ServerActionError: If the action can't be applied due to database |
| 85 | issue. |
| 86 | @param subprocess.CalledProcessError: If command is failed to be |
| 87 | executed. |
| 88 | """ |
| 89 | role = action[0] |
| 90 | command = action[1] |
| 91 | # Find the servers with role |
| 92 | servers = server_manager_utils.get_servers( |
| 93 | role=role, status=server_models.Server.STATUS.PRIMARY) |
| 94 | if not servers: |
| 95 | print >> sys.stderr, ('WARNING! Action %s failed to be applied. No ' |
| 96 | 'server with given role %s was found.' % |
| 97 | (action, role)) |
| 98 | return |
| 99 | |
| 100 | for server in servers: |
| 101 | print 'Run command `%s` on server %s' % (command, server.hostname) |
| 102 | try: |
| 103 | infra.execute_command(server.hostname, command) |
| 104 | except subprocess.CalledProcessError as e: |
| 105 | print >> sys.stderr, ('Failed to check server %s, error: %s' % |
| 106 | (server.hostname, e)) |
| 107 | |
| 108 | |
| 109 | def try_execute(server, roles, enable, post_change, |
| 110 | prev_status=server_models.Server.STATUS.BACKUP, |
| 111 | do_action=False): |
| 112 | """Try to execute actions for given role changes of the server. |
| 113 | |
| 114 | @param server: Server that has the role changes. |
| 115 | @param roles: A list of roles changed. |
| 116 | @param enable: Set to True if the roles are enabled, i.e., added to server. |
| 117 | If it's False, the roles are removed from the server. |
| 118 | @param post_change: Set to True if to apply actions should be applied after |
| 119 | the role changes, otherwise, set to False. |
| 120 | @param prev_status: The previous status after the status change if any. This |
| 121 | is to help to decide if actions should be executed, |
| 122 | since actions should be applied if the server's status |
| 123 | is changed from primary to other status. Default to |
| 124 | backup. |
| 125 | @param do_action: Set to True to execute actions, otherwise, post a warning. |
| 126 | """ |
| 127 | if not server_manager_utils.use_server_db(): |
| 128 | return |
| 129 | # This check is to prevent actions to be applied to server not in primary |
| 130 | # role or server database is not enabled. Note that no action is needed |
| 131 | # before a server is changed to primary status. If that assumption is |
| 132 | # no longer valid, this method needs to be updated accordingly. |
| 133 | if (server.status != server_models.Server.STATUS.PRIMARY and |
| 134 | prev_status != server_models.Server.STATUS.PRIMARY): |
| 135 | return |
| 136 | |
| 137 | if enable: |
| 138 | if post_change: |
| 139 | possible_actions = ACTIONS_AFTER_ROLE_APPLIED |
| 140 | else: |
| 141 | if post_change: |
| 142 | possible_actions = ACTIONS_AFTER_ROLE_REMOVED |
| 143 | else: |
| 144 | possible_actions = ACTIONS_BEFORE_ROLE_REMOVED |
| 145 | |
| 146 | all_actions = [] |
| 147 | for role in roles: |
| 148 | all_actions.extend(possible_actions.get(role, [])) |
| 149 | for action in set(all_actions): |
| 150 | if do_action: |
| 151 | apply(action) |
| 152 | else: |
| 153 | message = ('WARNING! Action %s is skipped. Please manually ' |
| 154 | 'execute the action to make your change effective.' % |
| 155 | str(action)) |
| 156 | print >> sys.stderr, message |