| #!/usr/bin/python |
| # Copyright (c) 2014 The Chromium OS Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| """Orchestrate virtual machines to setup a toy instance of the lab for testing. |
| |
| This module is meant to help create a closed loop development flow for members |
| of the lab team which looks something like this: |
| ______________ |
| | | |
| |gs vm resistry|<+ |
| |______________| | |
| | | |
| v | |
| New change -> puppylab -> New core_cluster box |
| | |
| Vagrantfile specifies cluster settings |
| _________________|____________________ |
| | | |
| | puppet provisions core_cluster box | |
| |______________________________________| |
| | | ........... | |
| v v v |
| master shard1 shardn |
| | | | | | | |
| mysql afe tko heartbt tko heartbt |
| | | | | | | |
| host ports 8001 8002 8001 8002 8001 8002 |
| [host ports liable to autocorrect as needed] |
| |
| This module can work with any vm hosting service/provider as long as they |
| adhere to the vagrant interface. VirtualBox is the only implementation so |
| far, though GCE will be an ideal candidate. |
| |
| Class spec: |
| * VagrantProvisioner: Provision boxes per a VagrantFile. |
| * VirtualBoxProvisioner: Generate a Virtualbox VagrantFile. |
| * CoreVM: Manage individual core_cluster vms. |
| * ClusterManager: Spin up cluster. |
| |
| Usage: clusterctl --admin-repo /usr/local/autotest/chromeos-internal |
| """ |
| |
| import argparse |
| import logging |
| import os |
| import sys |
| |
| import common |
| from autotest_lib.puppylab import lab_manifest |
| from autotest_lib.puppylab import vm_manager |
| from autotest_lib.site_utils.lib import infra |
| |
| |
| # TODO: Enable multiple shards via command line args. |
| NUM_SHARDS = 1 |
| SHADOW_PATH = '/usr/local/autotest/shadow_config.ini' |
| |
| |
| class ConfigError(Exception): |
| """Raised if one of the vms in the cluster is misconfigured.""" |
| |
| |
| class CoreVM(object): |
| """Interface to create and manage a core_cluster vm image. |
| |
| A core_cluster vm image has base packages shared by all server roles. |
| """ |
| _core_vm_name = 'chromeos_lab_core_cluster' |
| _core_image_source = 'gs://vms/%s.box' % _core_vm_name |
| _core_image_name = '%s.box' % _core_vm_name |
| _core_image_destination = os.path.join( |
| vm_manager.VAGRANT_DIR, _core_image_name) |
| |
| # TODO: Preperation is currently by hand. Use the provisioner to |
| # create a box of name '_core_image_name', with the CoreClusterTemplate |
| # in the VAGRANT_DIR if you wish to prepare a new vm. You can achieve |
| # this by: |
| # * Copying the CoreClusterTemplate to a Vagrantfile and replacing the |
| # modulepath with the path to you chromeos-admin/puppet directory. |
| # * Calling `vagrant up` in the directory with this vagrant file. |
| # * When it's done, calling vagrant package. |
| # This should produce a package.box in the same dir. |
| |
| def __init__(self, provisioner): |
| self.provisioner = provisioner |
| |
| |
| def setup_core_box(self): |
| """Setup a core cluster vm. |
| |
| Download a core_cluster image if one isn't present on disk and |
| register it with vagrant. |
| """ |
| if not os.path.exists(self._core_image_destination): |
| infra.execute_command( |
| 'localhost', 'gsutil cp %s %s' % |
| (self._core_image_source, self._core_image_destination)) |
| self.provisioner.register_box( |
| self._core_image_destination, self._core_vm_name) |
| |
| |
| def teardown_core_box(self): |
| """Teardown a core cluster vm.""" |
| # TODO: delete the box file. |
| self.provisioner.unregister_box(self._core_vm_name) |
| |
| |
| class ClusterManager(object): |
| """Interface to spin up a cluster of CoreVMs. |
| |
| This class manages all the details between creating a core_cluster image |
| and running tests on a full fledged cluster. |
| """ |
| |
| def _register_shards(self, num_shards): |
| """Register num_shards worth of shard info. |
| |
| This includes the name, port address and board of the new shard. This |
| information is piped through to each vm, so the cluster manager is |
| actually in control of all the shards in the cluster and can address |
| them by name. |
| |
| Consider a shard, shard1, assigned to board stumpy: |
| * You will be able to ssh into it with 'vagrant ssh stumpyshard'. |
| * The afe for the shard will be running on a incrementally |
| designated port starting from shards_base_port. |
| * The afe port of the shard is piped through to the shadow_config. |
| This is required for 2 reasons: |
| # `cli/atest shard add` should use this name, because it is |
| the name the shard-client will use to request jobs. |
| # the master afe should show links to the shard using this name. |
| |
| @param num_shards: The number of shards we wish to add to the cluster. |
| """ |
| self.vagrantfile_shard_args = {} |
| self.shard_board_map = {} |
| self.vagrant_shard_names = [] |
| |
| for num in range(1, num_shards+1): |
| # The name to use for vagrant ssh |
| shard_name = 'shard%s' % num |
| # The port for the shard's afe |
| shard_port = lab_manifest.shards_base_port + num |
| # The hostname to use in the shadow_config of the shard |
| shard_hostname = '%s:%s' % (lab_manifest.vm_host_name, shard_port) |
| |
| self.vagrantfile_shard_args.update({ |
| shard_name: shard_name, |
| '%s_shadow_config_hostname' % shard_name: shard_hostname, |
| '%s_port' % shard_name: shard_port, |
| }) |
| if lab_manifest.shards: |
| board = lab_manifest.shards.pop() |
| # Assign a board to a shard. Use the shard_hostname as this |
| # settings is not meant to be human understandable. |
| self.shard_board_map[shard_hostname] = board |
| vagrant_shard_name = '%sshard' % board.rsplit(':')[-1] |
| # Replace the shard<int>-type-name with board_shard |
| self.vagrantfile_shard_args[shard_name] = vagrant_shard_name |
| self.vagrant_shard_names.append(vagrant_shard_name) |
| |
| |
| def __init__(self, vm_provisioner, vagrant_master_name='master', |
| num_shards=1): |
| """Initialize parameters for the cluster. |
| |
| @param vm_provisioner: A provisioner object, currently the only one |
| supported is VirtualBox. |
| @param master_name: The name to give the cluster master. |
| @param num_shards: The number of shards in the cluster. Each shard |
| gets a name allocated based on its number (eg: shard1). |
| """ |
| self.provisioner = vm_provisioner |
| self.vm_manager = CoreVM(provisioner=self.provisioner) |
| self._register_shards(num_shards) |
| self.vagrant_master_name = vagrant_master_name |
| |
| |
| def start_cluster(self): |
| """Start a cluster.""" |
| self.vm_manager.setup_core_box() |
| |
| # TODO: Add a --rebuild-cluster option. |
| needs_destroy = self.provisioner.initialize_vagrant( |
| master=self.vagrant_master_name, |
| master_port=lab_manifest.master_afe_port, |
| **self.vagrantfile_shard_args) |
| self.provisioner.provision(needs_destroy) |
| |
| |
| def shutdown_cluster(self): |
| """Shutdown the current cluster.""" |
| # TODO: Actually destroy. Halt is useful for debugging. |
| self.provisioner.vagrant_cmd('halt') |
| |
| |
| def execute_against_vm(self, vm_name, cmd): |
| """Execute cmd against vm_name. |
| |
| @param cmd: The command to execute. |
| @param vm_name: The name of the vm, eg: stumpyshard. |
| """ |
| return self.provisioner.vagrant_cmd( |
| "ssh %s -- '%s'" % (vm_name, cmd)).rstrip('\n') |
| |
| |
| def _get_shadow_config_value(self, vm_name, key): |
| cmd = 'grep "^%s:" %s' % (key, SHADOW_PATH) |
| shadow_value = self.execute_against_vm(vm_name, cmd) |
| return shadow_value.rsplit(':')[-1].lstrip(' ') |
| |
| |
| def _check_shadow_config(self, vm, key, expected_value): |
| """Sanity check the shadow_configs of all vms in the cluster. |
| |
| @raises ConfigError: If a shadow_config is misconfigured. |
| """ |
| value = self._get_shadow_config_value(vm, key) |
| if value != expected_value: |
| raise ConfigError( |
| '%s vm has misconfigued config %s = %s, expected %s' % |
| (vm, key, value, expected_value)) |
| logging.info('%s has %s = %s', vm, key, value) |
| |
| |
| def _upstart_cmd(self, vm, job_name, cmd='status'): |
| """Execute an upstart command. |
| |
| @param vm: The name of the vm to execute it against. |
| @param job_name: The name of the upstart job. |
| @param cmd: The upstart command. |
| |
| @return: The output of the upstart command. |
| """ |
| status_cmd = 'sudo %s %s' % (cmd, job_name) |
| try: |
| return self.execute_against_vm(vm, status_cmd) |
| except vm_manager.VagrantCmdError as e: |
| return '%s service not found on %s' % (job_name, vm) |
| |
| |
| def check_services(self, action='start'): |
| """Get the status of all core services on the vms. |
| |
| This method is designed to start srevices on the master/all |
| shards if their shadow configs are as expected. If the shadow |
| config option on a vm has an unexpected setting, services |
| are not started on it. |
| |
| @param action: The action to perform on servcies. Start will |
| start all of them, stop will stop them all. |
| |
| @raises ConfigError: If a shadow_config option is unexpected. |
| """ |
| core_services = set( |
| ['scheduler', 'host-scheduler', |
| 'gs_offloader', 'gs_offloader_s', 'shard-client']) |
| gateway = self.execute_against_vm( |
| self.vagrant_master_name, |
| "netstat -rn | grep \"^0.0.0.0 \" | cut -d \" \" -f10 | head -1" |
| ).rstrip('\n') |
| |
| for vm in self.vagrant_shard_names + [self.vagrant_master_name]: |
| vm_manager.format_msg('Checking services on %s' % vm) |
| self._check_shadow_config(vm, 'host', 'localhost') |
| global_db = ('localhost' if vm == self.vagrant_master_name |
| else gateway) |
| self._check_shadow_config(vm, 'global_db_host', global_db) |
| |
| for service in core_services: |
| logging.info('Checking %s on %s', service, vm) |
| status = self._upstart_cmd(vm, service, action) |
| logging.info(status) |
| |
| |
| def bringup_cluster(admin_repo, num_shards=NUM_SHARDS, start_safe=False): |
| """Start a cluster. |
| |
| @param admin_repo: Path to the chromeos-admin repo. |
| @param num_shards: Number of shards. You cannot change |
| the number of shards on a running cluster, you need |
| to destroy the cluster, remove the vagrant file, |
| modify the ClusterTemplate to include a new section |
| for the additional shard, and rerun clusterctl. |
| @param start_safe: Start the cluster in safe mode. This means |
| all core services will be stopped. |
| """ |
| puppet_path = os.path.join(admin_repo, 'puppet') |
| if not os.path.exists(puppet_path): |
| raise ValueError('Admin repo %s does not contain puppet module' % |
| admin_repo) |
| cluster_manager = ClusterManager( |
| vm_provisioner=vm_manager.VirtualBox(puppet_path=puppet_path), |
| vagrant_master_name='master', num_shards=num_shards) |
| cluster_manager.start_cluster() |
| try: |
| cluster_manager.check_services(action='stop' if start_safe else 'start') |
| except ConfigError as e: |
| logging.error( |
| 'Shutting down cluster: %s', e) |
| cluster_manager.shutdown_cluster() |
| return 1 |
| |
| |
| def sync(): |
| """Sync autotest from the host to all vms in the cluster.""" |
| vm_manager.format_msg('Syncing Cluster') |
| vm_manager.VagrantProvisioner.vagrant_cmd('rsync', stream_output=True) |
| vm_manager.VagrantProvisioner.vagrant_cmd( |
| 'provision --provision-with shell', stream_output=True) |
| vm_manager.format_msg('Please restart services as required') |
| |
| |
| def _parse_args(args): |
| """Parse command line arguments. |
| |
| @param args: A list of command line arguments, eg sys.argv[1:] |
| |
| @return: A tuple with the parsed args, as returned by parser.parse_args. |
| """ |
| if not args: |
| print ('Too few arguments, try clusterctl --help') |
| sys.exit(1) |
| |
| description = ('A script to orchestrate a toy test lab. Provided ' |
| 'with a path to the internal repo it will download a ' |
| 'vm image and spin up a cluster against which you can ' |
| 'test core autotest changes without DUTs.') |
| parser = argparse.ArgumentParser(description=description) |
| subparsers = parser.add_subparsers() |
| provision_subparser = subparsers.add_parser( |
| 'provision', help='provision a cluster') |
| provision_subparser.required = False |
| provision_subparser.set_defaults(which='provision') |
| provision_subparser.add_argument( |
| '--admin-repo', dest='admin_repo', type=str, |
| help=('Path to the admin repo that has puppet scripts used for ' |
| 'provisioning the cluster. If you do not already have it you ' |
| 'can git clone the chromeos/chromeos-admin repo.')) |
| provision_subparser.add_argument( |
| '--safe', dest='start_safe', action='store_true', |
| help='If sepcified services will not be started automatically.') |
| |
| # TODO: Automate restart of services via a --restart option. |
| update_subparser = subparsers.add_parser('update', help='Update a cluster') |
| update_subparser.required = False |
| update_subparser.set_defaults(which='update') |
| update_subparser.add_argument( |
| '--sync', dest='sync', action='store_true', |
| help='Sync autotest from host to all vms in cluster.') |
| return parser.parse_args(args) |
| |
| |
| def main(args): |
| """Main function. |
| |
| @param args: command line arguments for the script. |
| """ |
| args = _parse_args(args) |
| if args.which == 'update' and args.sync: |
| sync() |
| else: |
| bringup_cluster( |
| admin_repo=args.admin_repo, start_safe=args.start_safe) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main(sys.argv[1:])) |