site_utils/lxc/container_bucket.py - platform/external/autotest - Gitiles

 # Copyright 2017 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import logging
 import os
 import socket
 import time

 import common

 from autotest_lib.client.bin import utils
 from autotest_lib.client.common_lib import error
 from autotest_lib.client.common_lib.global_config import global_config
 from autotest_lib.site_utils.lxc import config as lxc_config
 from autotest_lib.site_utils.lxc import constants
 from autotest_lib.site_utils.lxc import container_pool
 from autotest_lib.site_utils.lxc import lxc
 from autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail
 from autotest_lib.site_utils.lxc.base_image import BaseImage
 from autotest_lib.site_utils.lxc.constants import \
     CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX
 from autotest_lib.site_utils.lxc.container import Container
 from autotest_lib.site_utils.lxc.container_factory import ContainerFactory

 try:
     from chromite.lib import metrics
     from infra_libs import ts_mon
 except ImportError:
     import mock
     metrics = utils.metrics_mock
     ts_mon = mock.Mock()


 # Timeout (in seconds) for container pool operations.
 _CONTAINER_POOL_TIMEOUT = 3

 _USE_LXC_POOL = global_config.get_config_value('LXC_POOL', 'use_lxc_pool',
                                                type=bool)

 class ContainerBucket(object):
     """A wrapper class to interact with containers in a specific container path.
     """

     def __init__(self, container_path=constants.DEFAULT_CONTAINER_PATH,
                  container_factory=None):
         """Initialize a ContainerBucket.

         @param container_path: Path to the directory used to store containers.
                                Default is set to AUTOSERV/container_path in
                                global config.
         @param container_factory: A factory for creating Containers.
         """
         self.container_path = os.path.realpath(container_path)
         if container_factory is not None:
             self._factory = container_factory
         else:
             # Pick the correct factory class to use (pool-based, or regular)
             # based on the config variable.
             factory_class = ContainerFactory
             if _USE_LXC_POOL:
                 logging.debug('Using container pool')
                 factory_class = _PoolBasedFactory

             # Pass in the container path so that the bucket is hermetic (i.e. so
             # that if the container path is customized, the base image doesn't
             # fall back to using the default container path).
             try:
                 base_image_ok = True
                 container = BaseImage(self.container_path).get()
             except error.ContainerError as e:
                 base_image_ok = False
                 raise e
             finally:
                 metrics.Counter(METRICS_PREFIX + '/base_image',
                                 field_spec=[ts_mon.BooleanField('corrupted')]
                                 ).increment(
                                     fields={'corrupted': not base_image_ok})
             self._factory = factory_class(
                 base_container=container,
                 lxc_path=self.container_path)
         self.container_cache = {}


     def get_all(self, force_update=False):
         """Get details of all containers.

         Retrieves all containers owned by the bucket.  Note that this doesn't
         include the base container, or any containers owned by the container
         pool.

         @param force_update: Boolean, ignore cached values if set.

         @return: A dictionary of all containers with detailed attributes,
                  indexed by container name.
         """
         info_collection = lxc.get_container_info(self.container_path)
         containers = {} if force_update else self.container_cache
         for info in info_collection:
             if info["name"] in containers:
                 continue
             container = Container.create_from_existing_dir(self.container_path,
                                                            **info)
             # Active containers have an ID.  Zygotes and base containers, don't.
             if container.id is not None:
                 containers[container.id] = container
         self.container_cache = containers
         return containers


     def get_container(self, container_id):
         """Get a container with matching name.

         @param container_id: ID of the container.

         @return: A container object with matching name. Returns None if no
                  container matches the given name.
         """
         if container_id in self.container_cache:
             return self.container_cache[container_id]

         return self.get_all().get(container_id, None)


     def exist(self, container_id):
         """Check if a container exists with the given name.

         @param container_id: ID of the container.

         @return: True if the container with the given ID exists, otherwise
                  returns False.
         """
         return self.get_container(container_id) != None


     def destroy_all(self):
         """Destroy all containers, base must be destroyed at the last.
         """
         containers = self.get_all().values()
         for container in sorted(
                 containers, key=lambda n: 1 if n.name == constants.BASE else 0):
             key = container.id
             logging.info('Destroy container %s.', container.name)
             container.destroy()
             del self.container_cache[key]


     @metrics.SecondsTimerDecorator(
         '%s/setup_test_duration' % constants.STATS_KEY)
     @cleanup_if_fail()
     def setup_test(self, container_id, job_id, server_package_url, result_path,
                    control=None, skip_cleanup=False, job_folder=None,
                    dut_name=None, isolate_hash=None):
         """Setup test container for the test job to run.

         The setup includes:
         1. Install autotest_server package from given url.
         2. Copy over local shadow_config.ini.
         3. Mount local site-packages.
         4. Mount test result directory.

         TODO(dshi): Setup also needs to include test control file for autoserv
                     to run in container.

         @param container_id: ID to assign to the test container.
         @param job_id: Job id for the test job to run in the test container.
         @param server_package_url: Url to download autotest_server package.
         @param result_path: Directory to be mounted to container to store test
                             results.
         @param control: Path to the control file to run the test job. Default is
                         set to None.
         @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
                              container failures.
         @param job_folder: Folder name of the job, e.g., 123-debug_user.
         @param dut_name: Name of the dut to run test, used as the hostname of
                          the container. Default is None.
         @param isolate_hash: String key to look up the isolate package needed
                              to run test. Default is None, supersedes
                              server_package_url if present.
         @return: A Container object for the test container.

         @raise ContainerError: If container does not exist, or not running.
         """
         start_time = time.time()

         if not os.path.exists(result_path):
             raise error.ContainerError('Result directory does not exist: %s',
                                        result_path)
         result_path = os.path.abspath(result_path)

         # Save control file to result_path temporarily. The reason is that the
         # control file in drone_tmp folder can be deleted during scheduler
         # restart. For test not using SSP, the window between test starts and
         # control file being picked up by the test is very small (< 2 seconds).
         # However, for tests using SSP, it takes around 1 minute before the
         # container is setup. If scheduler is restarted during that period, the
         # control file will be deleted, and the test will fail.
         if control:
             control_file_name = os.path.basename(control)
             safe_control = os.path.join(result_path, control_file_name)
             utils.run('cp %s %s' % (control, safe_control))

         # Create test container from the base container.
         container = self._factory.create_container(container_id)

         # Deploy server side package
         if isolate_hash:
           container.install_ssp_isolate(isolate_hash)
         else:
           container.install_ssp(server_package_url)

         deploy_config_manager = lxc_config.DeployConfigManager(container)
         deploy_config_manager.deploy_pre_start()

         # Copy over control file to run the test job.
         if control:
             container.install_control_file(safe_control)

         mount_entries = [(constants.SITE_PACKAGES_PATH,
                           constants.CONTAINER_SITE_PACKAGES_PATH,
                           True),
                          (result_path,
                           os.path.join(constants.RESULT_DIR_FMT % job_folder),
                           False),
         ]

         # Update container config to mount directories.
         for source, destination, readonly in mount_entries:
             container.mount_dir(source, destination, readonly)

         # Update file permissions.
         # TODO(dshi): crbug.com/459344 Skip following action when test container
         # can be unprivileged container.
         autotest_path = os.path.join(
                 container.rootfs,
                 constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep))
         utils.run('sudo chown -R root "%s"' % autotest_path)
         utils.run('sudo chgrp -R root "%s"' % autotest_path)

         container.start(wait_for_network=True)
         deploy_config_manager.deploy_post_start()

         # Update the hostname of the test container to be `dut-name`.
         # Some TradeFed tests use hostname in test results, which is used to
         # group test results in dashboard. The default container name is set to
         # be the name of the folder, which is unique (as it is composed of job
         # id and timestamp. For better result view, the container's hostname is
         # set to be a string containing the dut hostname.
         if dut_name:
             container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT %
                                    dut_name.replace('.', '-'))

         container.modify_import_order()

         container.verify_autotest_setup(job_folder)

         logging.debug('Test container %s is set up.', container.name)
         return container


 class _PoolBasedFactory(ContainerFactory):
     """A ContainerFactory that queries the running container pool.

     Implementation falls back to the regular container factory behaviour
     (i.e. locally cloning a container) if the pool is unavailable or if it does
     not return a bucket before the specified timeout.
     """

     def __init__(self, *args, **kwargs):
         super(_PoolBasedFactory, self).__init__(*args, **kwargs)
         try:
             self._client = container_pool.Client()
         except (socket.error, socket.timeout) as e:
             # If an error occurs connecting to the container pool, fall back to
             # the default container factory.
             logging.exception('Container pool connection failed.')
             self._client = None


     def create_container(self, new_id):
         """Creates a new container.

         Attempts to retrieve a container from the container pool.  If that
         operation fails, this falls back to the parent class behaviour.

         @param new_id: ContainerId to assign to the new container.  Containers
                        must be assigned an ID before they can be released from
                        the container pool.

         @return: The new container.
         """
         container = None
         if self._client:
             try:
                 container = self._client.get_container(new_id,
                                                        _CONTAINER_POOL_TIMEOUT)
             except Exception:
                 logging.exception('Error communicating with container pool.')
             else:
                 if container is not None:
                     logging.debug('Retrieved container from pool: %s',
                                   container.name)
                     return container
         metrics.Counter(METRICS_PREFIX + '/containers_served',
                         field_spec = [ts_mon.BooleanField('from_pool')]
                         ).increment(fields={
                             'from_pool': (container is not None)})
         if container is not None:
             return container

         # If the container pool did not yield a container, make one locally.
         logging.warning('Unable to obtain container from pre-populated pool.  '
                         'Creating container locally.  This slows server tests '
                         'down and should be debugged even if local creation '
                         'works out.')
         return super(_PoolBasedFactory, self).create_container(new_id)
	# Copyright 2017 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import logging
	import os
	import socket
	import time

	import common

	from autotest_lib.client.bin import utils
	from autotest_lib.client.common_lib import error
	from autotest_lib.client.common_lib.global_config import global_config
	from autotest_lib.site_utils.lxc import config as lxc_config
	from autotest_lib.site_utils.lxc import constants
	from autotest_lib.site_utils.lxc import container_pool
	from autotest_lib.site_utils.lxc import lxc
	from autotest_lib.site_utils.lxc.cleanup_if_fail import cleanup_if_fail
	from autotest_lib.site_utils.lxc.base_image import BaseImage
	from autotest_lib.site_utils.lxc.constants import \
	CONTAINER_POOL_METRICS_PREFIX as METRICS_PREFIX
	from autotest_lib.site_utils.lxc.container import Container
	from autotest_lib.site_utils.lxc.container_factory import ContainerFactory

	try:
	from chromite.lib import metrics
	from infra_libs import ts_mon
	except ImportError:
	import mock
	metrics = utils.metrics_mock
	ts_mon = mock.Mock()


	# Timeout (in seconds) for container pool operations.
	_CONTAINER_POOL_TIMEOUT = 3

	_USE_LXC_POOL = global_config.get_config_value('LXC_POOL', 'use_lxc_pool',
	type=bool)

	class ContainerBucket(object):
	"""A wrapper class to interact with containers in a specific container path.
	"""

	def __init__(self, container_path=constants.DEFAULT_CONTAINER_PATH,
	container_factory=None):
	"""Initialize a ContainerBucket.

	@param container_path: Path to the directory used to store containers.
	Default is set to AUTOSERV/container_path in
	global config.
	@param container_factory: A factory for creating Containers.
	"""
	self.container_path = os.path.realpath(container_path)
	if container_factory is not None:
	self._factory = container_factory
	else:
	# Pick the correct factory class to use (pool-based, or regular)
	# based on the config variable.
	factory_class = ContainerFactory
	if _USE_LXC_POOL:
	logging.debug('Using container pool')
	factory_class = _PoolBasedFactory

	# Pass in the container path so that the bucket is hermetic (i.e. so
	# that if the container path is customized, the base image doesn't
	# fall back to using the default container path).
	try:
	base_image_ok = True
	container = BaseImage(self.container_path).get()
	except error.ContainerError as e:
	base_image_ok = False
	raise e
	finally:
	metrics.Counter(METRICS_PREFIX + '/base_image',
	field_spec=[ts_mon.BooleanField('corrupted')]
	).increment(
	fields={'corrupted': not base_image_ok})
	self._factory = factory_class(
	base_container=container,
	lxc_path=self.container_path)
	self.container_cache = {}


	def get_all(self, force_update=False):
	"""Get details of all containers.

	Retrieves all containers owned by the bucket. Note that this doesn't
	include the base container, or any containers owned by the container
	pool.

	@param force_update: Boolean, ignore cached values if set.

	@return: A dictionary of all containers with detailed attributes,
	indexed by container name.
	"""
	info_collection = lxc.get_container_info(self.container_path)
	containers = {} if force_update else self.container_cache
	for info in info_collection:
	if info["name"] in containers:
	continue
	container = Container.create_from_existing_dir(self.container_path,
	**info)
	# Active containers have an ID. Zygotes and base containers, don't.
	if container.id is not None:
	containers[container.id] = container
	self.container_cache = containers
	return containers


	def get_container(self, container_id):
	"""Get a container with matching name.

	@param container_id: ID of the container.

	@return: A container object with matching name. Returns None if no
	container matches the given name.
	"""
	if container_id in self.container_cache:
	return self.container_cache[container_id]

	return self.get_all().get(container_id, None)


	def exist(self, container_id):
	"""Check if a container exists with the given name.

	@param container_id: ID of the container.

	@return: True if the container with the given ID exists, otherwise
	returns False.
	"""
	return self.get_container(container_id) != None


	def destroy_all(self):
	"""Destroy all containers, base must be destroyed at the last.
	"""
	containers = self.get_all().values()
	for container in sorted(
	containers, key=lambda n: 1 if n.name == constants.BASE else 0):
	key = container.id
	logging.info('Destroy container %s.', container.name)
	container.destroy()
	del self.container_cache[key]



	@metrics.SecondsTimerDecorator(
	'%s/setup_test_duration' % constants.STATS_KEY)
	@cleanup_if_fail()
	def setup_test(self, container_id, job_id, server_package_url, result_path,
	control=None, skip_cleanup=False, job_folder=None,
	dut_name=None, isolate_hash=None):
	"""Setup test container for the test job to run.

	The setup includes:
	1. Install autotest_server package from given url.
	2. Copy over local shadow_config.ini.
	3. Mount local site-packages.
	4. Mount test result directory.

	TODO(dshi): Setup also needs to include test control file for autoserv
	to run in container.

	@param container_id: ID to assign to the test container.
	@param job_id: Job id for the test job to run in the test container.
	@param server_package_url: Url to download autotest_server package.
	@param result_path: Directory to be mounted to container to store test
	results.
	@param control: Path to the control file to run the test job. Default is
	set to None.
	@param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
	container failures.
	@param job_folder: Folder name of the job, e.g., 123-debug_user.
	@param dut_name: Name of the dut to run test, used as the hostname of
	the container. Default is None.
	@param isolate_hash: String key to look up the isolate package needed
	to run test. Default is None, supersedes
	server_package_url if present.
	@return: A Container object for the test container.

	@raise ContainerError: If container does not exist, or not running.
	"""
	start_time = time.time()

	if not os.path.exists(result_path):
	raise error.ContainerError('Result directory does not exist: %s',
	result_path)
	result_path = os.path.abspath(result_path)

	# Save control file to result_path temporarily. The reason is that the
	# control file in drone_tmp folder can be deleted during scheduler
	# restart. For test not using SSP, the window between test starts and
	# control file being picked up by the test is very small (< 2 seconds).
	# However, for tests using SSP, it takes around 1 minute before the
	# container is setup. If scheduler is restarted during that period, the
	# control file will be deleted, and the test will fail.
	if control:
	control_file_name = os.path.basename(control)
	safe_control = os.path.join(result_path, control_file_name)
	utils.run('cp %s %s' % (control, safe_control))

	# Create test container from the base container.
	container = self._factory.create_container(container_id)

	# Deploy server side package
	if isolate_hash:
	container.install_ssp_isolate(isolate_hash)
	else:
	container.install_ssp(server_package_url)

	deploy_config_manager = lxc_config.DeployConfigManager(container)
	deploy_config_manager.deploy_pre_start()

	# Copy over control file to run the test job.
	if control:
	container.install_control_file(safe_control)

	mount_entries = [(constants.SITE_PACKAGES_PATH,
	constants.CONTAINER_SITE_PACKAGES_PATH,
	True),
	(result_path,
	os.path.join(constants.RESULT_DIR_FMT % job_folder),
	False),
	]

	# Update container config to mount directories.
	for source, destination, readonly in mount_entries:
	container.mount_dir(source, destination, readonly)

	# Update file permissions.
	# TODO(dshi): crbug.com/459344 Skip following action when test container
	# can be unprivileged container.
	autotest_path = os.path.join(
	container.rootfs,
	constants.CONTAINER_AUTOTEST_DIR.lstrip(os.path.sep))
	utils.run('sudo chown -R root "%s"' % autotest_path)
	utils.run('sudo chgrp -R root "%s"' % autotest_path)

	container.start(wait_for_network=True)
	deploy_config_manager.deploy_post_start()

	# Update the hostname of the test container to be `dut-name`.
	# Some TradeFed tests use hostname in test results, which is used to
	# group test results in dashboard. The default container name is set to
	# be the name of the folder, which is unique (as it is composed of job
	# id and timestamp. For better result view, the container's hostname is
	# set to be a string containing the dut hostname.
	if dut_name:
	container.set_hostname(constants.CONTAINER_UTSNAME_FORMAT %
	dut_name.replace('.', '-'))

	container.modify_import_order()

	container.verify_autotest_setup(job_folder)

	logging.debug('Test container %s is set up.', container.name)
	return container


	class _PoolBasedFactory(ContainerFactory):
	"""A ContainerFactory that queries the running container pool.

	Implementation falls back to the regular container factory behaviour
	(i.e. locally cloning a container) if the pool is unavailable or if it does
	not return a bucket before the specified timeout.
	"""

	def __init__(self, args, *kwargs):
	super(_PoolBasedFactory, self).__init__(args, *kwargs)
	try:
	self._client = container_pool.Client()
	except (socket.error, socket.timeout) as e:
	# If an error occurs connecting to the container pool, fall back to
	# the default container factory.
	logging.exception('Container pool connection failed.')
	self._client = None


	def create_container(self, new_id):
	"""Creates a new container.

	Attempts to retrieve a container from the container pool. If that
	operation fails, this falls back to the parent class behaviour.

	@param new_id: ContainerId to assign to the new container. Containers
	must be assigned an ID before they can be released from
	the container pool.

	@return: The new container.
	"""
	container = None
	if self._client:
	try:
	container = self._client.get_container(new_id,
	_CONTAINER_POOL_TIMEOUT)
	except Exception:
	logging.exception('Error communicating with container pool.')
	else:
	if container is not None:
	logging.debug('Retrieved container from pool: %s',
	container.name)
	return container
	metrics.Counter(METRICS_PREFIX + '/containers_served',
	field_spec = [ts_mon.BooleanField('from_pool')]
	).increment(fields={
	'from_pool': (container is not None)})
	if container is not None:
	return container

	# If the container pool did not yield a container, make one locally.
	logging.warning('Unable to obtain container from pre-populated pool. '
	'Creating container locally. This slows server tests '
	'down and should be debugged even if local creation '
	'works out.')
	return super(_PoolBasedFactory, self).create_container(new_id)