| #!/usr/bin/python -u |
| # Copyright 2007-2008 Martin J. Bligh <mbligh@google.com>, Google Inc. |
| # Released under the GPL v2 |
| |
| """ |
| Run a control file through the server side engine |
| """ |
| |
| import ast |
| import datetime |
| import getpass |
| import logging |
| import os |
| import re |
| import signal |
| import socket |
| import sys |
| import traceback |
| import time |
| import urllib2 |
| |
| import common |
| |
| from autotest_lib.client.common_lib import control_data |
| from autotest_lib.client.common_lib import global_config |
| try: |
| from autotest_lib.puppylab import results_mocker |
| except ImportError: |
| results_mocker = None |
| |
| require_atfork = global_config.global_config.get_config_value( |
| 'AUTOSERV', 'require_atfork_module', type=bool, default=True) |
| |
| |
| # Number of seconds to wait before returning if testing mode is enabled |
| TESTING_MODE_SLEEP_SECS = 1 |
| |
| try: |
| import atfork |
| atfork.monkeypatch_os_fork_functions() |
| import atfork.stdlib_fixer |
| # Fix the Python standard library for threading+fork safety with its |
| # internal locks. http://code.google.com/p/python-atfork/ |
| import warnings |
| warnings.filterwarnings('ignore', 'logging module already imported') |
| atfork.stdlib_fixer.fix_logging_module() |
| except ImportError, e: |
| from autotest_lib.client.common_lib import global_config |
| if global_config.global_config.get_config_value( |
| 'AUTOSERV', 'require_atfork_module', type=bool, default=False): |
| print >>sys.stderr, 'Please run utils/build_externals.py' |
| print e |
| sys.exit(1) |
| |
| from autotest_lib.server import frontend |
| from autotest_lib.server import server_logging_config |
| from autotest_lib.server import server_job, utils, autoserv_parser, autotest |
| from autotest_lib.server import utils as server_utils |
| from autotest_lib.site_utils import job_directories |
| from autotest_lib.site_utils import job_overhead |
| from autotest_lib.site_utils import lxc |
| from autotest_lib.client.common_lib import pidfile, logging_manager |
| from autotest_lib.client.common_lib.cros.graphite import autotest_stats |
| |
| # Control segment to stage server-side package. |
| STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path( |
| 'stage_server_side_package') |
| |
| def log_alarm(signum, frame): |
| logging.error("Received SIGALARM. Ignoring and continuing on.") |
| sys.exit(1) |
| |
| |
| def _get_machines(parser): |
| """Get a list of machine names from command line arg -m or a file. |
| |
| @param parser: Parser for the command line arguments. |
| |
| @return: A list of machine names from command line arg -m or the |
| machines file specified in the command line arg -M. |
| """ |
| if parser.options.machines: |
| machines = parser.options.machines.replace(',', ' ').strip().split() |
| else: |
| machines = [] |
| machines_file = parser.options.machines_file |
| if machines_file: |
| machines = [] |
| for m in open(machines_file, 'r').readlines(): |
| # remove comments, spaces |
| m = re.sub('#.*', '', m).strip() |
| if m: |
| machines.append(m) |
| logging.debug('Read list of machines from file: %s', machines_file) |
| logging.debug('Machines: %s', ','.join(machines)) |
| |
| if machines: |
| for machine in machines: |
| if not machine or re.search('\s', machine): |
| parser.parser.error("Invalid machine: %s" % str(machine)) |
| machines = list(set(machines)) |
| machines.sort() |
| return machines |
| |
| |
| def _stage_ssp(parser): |
| """Stage server-side package. |
| |
| This function calls a control segment to stage server-side package based on |
| the job and autoserv command line option. The detail implementation could |
| be different for each host type. Currently, only CrosHost has |
| stage_server_side_package function defined. |
| The script returns None if no server-side package is available. However, |
| it may raise exception if it failed for reasons other than artifact (the |
| server-side package) not found. |
| |
| @param parser: Command line arguments parser passed in the autoserv process. |
| |
| @return: url of the staged server-side package. Return None if server- |
| side package is not found for the build. |
| """ |
| namespace = {'machines': _get_machines(parser), |
| 'image': parser.options.image} |
| script_locals = {} |
| execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals) |
| return script_locals['ssp_url'] |
| |
| |
| def _run_with_ssp(container_name, job_id, results, parser, ssp_url): |
| """Run the server job with server-side packaging. |
| |
| @param container_name: Name of the container to run the test. |
| @param job_id: ID of the test job. |
| @param results: Folder to store results. This could be different from |
| parser.options.results: |
| parser.options.results can be set to None for results to be |
| stored in a temp folder. |
| results can be None for autoserv run requires no logging. |
| @param parser: Command line parser that contains the options. |
| @param ssp_url: url of the staged server-side package. |
| """ |
| bucket = lxc.ContainerBucket() |
| control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != '' |
| else None) |
| test_container = bucket.setup_test(container_name, job_id, ssp_url, results, |
| control=control) |
| args = sys.argv[:] |
| args.remove('--require-ssp') |
| |
| # A dictionary of paths to replace in the command line. Key is the path to |
| # be replaced with the one in value. |
| paths_to_replace = {} |
| # Replace the control file path with the one in container. |
| if control: |
| container_control_filename = os.path.join( |
| lxc.CONTROL_TEMP_PATH, os.path.basename(control)) |
| paths_to_replace[control] = container_control_filename |
| # Update result directory with the one in container. |
| if parser.options.results: |
| container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id) |
| paths_to_replace[parser.options.results] = container_result_dir |
| # Update parse_job directory with the one in container. The assumption is |
| # that the result folder to be parsed is always the same as the results_dir. |
| if parser.options.parse_job: |
| container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id) |
| paths_to_replace[parser.options.parse_job] = container_result_dir |
| |
| args = [paths_to_replace.get(arg, arg) for arg in args] |
| |
| # Apply --use-existing-results, results directory is aready created and |
| # mounted in container. Apply this arg to avoid exception being raised. |
| if not '--use-existing-results' in args: |
| args.append('--use-existing-results') |
| |
| # Make sure autoserv running in container using a different pid file. |
| if not '--pidfile-label' in args: |
| args.extend(['--pidfile-label', 'container_autoserv']) |
| |
| cmd_line = ' '.join(args) |
| logging.info('Run command in container: %s', cmd_line) |
| try: |
| test_container.attach_run(cmd_line) |
| finally: |
| test_container.destroy() |
| |
| |
| def run_autoserv(pid_file_manager, results, parser, ssp_url): |
| """Run server job with given options. |
| |
| @param pid_file_manager: PidFileManager used to monitor the autoserv process |
| @param results: Folder to store results. |
| @param parser: Parser for the command line arguments. |
| @param ssp_url: Url to server-side package. |
| """ |
| if parser.options.warn_no_ssp: |
| # Post a warning in the log and force to not use server-side packaging. |
| parser.options.require_ssp = False |
| logging.warn('Autoserv is required to run with server-side packaging. ' |
| 'However, no drone is found to support server-side ' |
| 'packaging. The test will be executed in a drone without ' |
| 'server-side packaging supported.') |
| |
| # send stdin to /dev/null |
| dev_null = os.open(os.devnull, os.O_RDONLY) |
| os.dup2(dev_null, sys.stdin.fileno()) |
| os.close(dev_null) |
| |
| # Create separate process group |
| os.setpgrp() |
| |
| # Container name is predefined so the container can be destroyed in |
| # handle_sigterm. |
| job_or_task_id = job_directories.get_job_id_or_task_id( |
| parser.options.results) |
| container_name = (lxc.TEST_CONTAINER_NAME_FMT % |
| (job_or_task_id, time.time())) |
| |
| # Implement SIGTERM handler |
| def handle_sigterm(signum, frame): |
| logging.debug('Received SIGTERM') |
| if pid_file_manager: |
| pid_file_manager.close_file(1, signal.SIGTERM) |
| logging.debug('Finished writing to pid_file. Killing process.') |
| # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved. |
| # This sleep allows the pending output to be logged before the kill |
| # signal is sent. |
| time.sleep(.1) |
| if parser.options.require_ssp and ssp_url: |
| logging.debug('Destroy container %s before aborting the autoserv ' |
| 'process.', container_name) |
| try: |
| bucket = lxc.ContainerBucket() |
| container = bucket.get(container_name) |
| if container: |
| container.destroy() |
| else: |
| logging.debug('Container %s is not found.', container_name) |
| except: |
| # Handle any exception so the autoserv process can be aborted. |
| logging.error('Failed to destroy container %s. Error: %s', |
| container_name, sys.exc_info()) |
| |
| os.killpg(os.getpgrp(), signal.SIGKILL) |
| |
| # Set signal handler |
| signal.signal(signal.SIGTERM, handle_sigterm) |
| |
| # faulthandler is only needed to debug in the Lab and is not avaliable to |
| # be imported in the chroot as part of VMTest, so Try-Except it. |
| try: |
| import faulthandler |
| faulthandler.register(signal.SIGTERM, all_threads=True, chain=True) |
| logging.debug('faulthandler registered on SIGTERM.') |
| except ImportError: |
| pass |
| |
| # Ignore SIGTTOU's generated by output from forked children. |
| signal.signal(signal.SIGTTOU, signal.SIG_IGN) |
| |
| # If we received a SIGALARM, let's be loud about it. |
| signal.signal(signal.SIGALRM, log_alarm) |
| |
| # Server side tests that call shell scripts often depend on $USER being set |
| # but depending on how you launch your autotest scheduler it may not be set. |
| os.environ['USER'] = getpass.getuser() |
| |
| label = parser.options.label |
| group_name = parser.options.group_name |
| user = parser.options.user |
| client = parser.options.client |
| server = parser.options.server |
| install_before = parser.options.install_before |
| install_after = parser.options.install_after |
| verify = parser.options.verify |
| repair = parser.options.repair |
| cleanup = parser.options.cleanup |
| provision = parser.options.provision |
| reset = parser.options.reset |
| job_labels = parser.options.job_labels |
| no_tee = parser.options.no_tee |
| parse_job = parser.options.parse_job |
| execution_tag = parser.options.execution_tag |
| if not execution_tag: |
| execution_tag = parse_job |
| host_protection = parser.options.host_protection |
| ssh_user = parser.options.ssh_user |
| ssh_port = parser.options.ssh_port |
| ssh_pass = parser.options.ssh_pass |
| collect_crashinfo = parser.options.collect_crashinfo |
| control_filename = parser.options.control_filename |
| test_retry = parser.options.test_retry |
| verify_job_repo_url = parser.options.verify_job_repo_url |
| skip_crash_collection = parser.options.skip_crash_collection |
| ssh_verbosity = int(parser.options.ssh_verbosity) |
| ssh_options = parser.options.ssh_options |
| |
| # can't be both a client and a server side test |
| if client and server: |
| parser.parser.error("Can not specify a test as both server and client!") |
| |
| if provision and client: |
| parser.parser.error("Cannot specify provisioning and client!") |
| |
| is_special_task = (verify or repair or cleanup or collect_crashinfo or |
| provision or reset) |
| if len(parser.args) < 1 and not is_special_task: |
| parser.parser.error("Missing argument: control file") |
| |
| if ssh_verbosity > 0: |
| # ssh_verbosity is an integer between 0 and 3, inclusive |
| ssh_verbosity_flag = '-' + 'v' * ssh_verbosity |
| else: |
| ssh_verbosity_flag = '' |
| |
| # We have a control file unless it's just a verify/repair/cleanup job |
| if len(parser.args) > 0: |
| control = parser.args[0] |
| else: |
| control = None |
| |
| machines = _get_machines(parser) |
| if group_name and len(machines) < 2: |
| parser.parser.error('-G %r may only be supplied with more than one ' |
| 'machine.' % group_name) |
| |
| kwargs = {'group_name': group_name, 'tag': execution_tag, |
| 'disable_sysinfo': parser.options.disable_sysinfo} |
| if control_filename: |
| kwargs['control_filename'] = control_filename |
| job = server_job.server_job(control, parser.args[1:], results, label, |
| user, machines, client, parse_job, |
| ssh_user, ssh_port, ssh_pass, |
| ssh_verbosity_flag, ssh_options, |
| test_retry, **kwargs) |
| |
| job.logging.start_logging() |
| job.init_parser() |
| |
| # perform checks |
| job.precheck() |
| |
| # run the job |
| exit_code = 0 |
| try: |
| try: |
| if repair: |
| job.repair(host_protection, job_labels) |
| elif verify: |
| job.verify(job_labels) |
| elif provision: |
| job.provision(job_labels) |
| elif reset: |
| job.reset(job_labels) |
| elif cleanup: |
| job.cleanup(job_labels) |
| else: |
| # Server-side packaging will only be used if it's required and |
| # the package is available. |
| if parser.options.require_ssp and ssp_url: |
| try: |
| _run_with_ssp(container_name, job_or_task_id, results, |
| parser, ssp_url) |
| finally: |
| # Update the ownership of files in result folder. |
| # TODO(dshi): crbug.com/459344 Skip following action |
| # when test container can be unprivileged container. |
| if results: |
| lxc.run('chown -R %s %s' % (os.getuid(), results)) |
| lxc.run('chgrp -R %s %s' % (os.getgid(), results)) |
| else: |
| job.run(install_before, install_after, |
| verify_job_repo_url=verify_job_repo_url, |
| only_collect_crashinfo=collect_crashinfo, |
| skip_crash_collection=skip_crash_collection, |
| job_labels=job_labels) |
| finally: |
| while job.hosts: |
| host = job.hosts.pop() |
| host.close() |
| except: |
| exit_code = 1 |
| traceback.print_exc() |
| |
| if pid_file_manager: |
| pid_file_manager.num_tests_failed = job.num_tests_failed |
| pid_file_manager.close_file(exit_code) |
| job.cleanup_parser() |
| |
| sys.exit(exit_code) |
| |
| |
| def record_autoserv(options, duration_secs): |
| """Record autoserv end-to-end time in metadata db. |
| |
| @param options: parser options. |
| @param duration_secs: How long autoserv has taken, in secs. |
| """ |
| # Get machine hostname |
| machines = options.machines.replace( |
| ',', ' ').strip().split() if options.machines else [] |
| num_machines = len(machines) |
| if num_machines > 1: |
| # Skip the case where atomic group is used. |
| return |
| elif num_machines == 0: |
| machines.append('hostless') |
| |
| # Determine the status that will be reported. |
| s = job_overhead.STATUS |
| task_mapping = { |
| 'reset': s.RESETTING, 'verify': s.VERIFYING, |
| 'provision': s.PROVISIONING, 'repair': s.REPAIRING, |
| 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING} |
| # option_dict will be like {'reset': True, 'repair': False, ...} |
| option_dict = ast.literal_eval(str(options)) |
| match = filter(lambda task: option_dict.get(task) == True, task_mapping) |
| status = task_mapping[match[0]] if match else s.RUNNING |
| is_special_task = status not in [s.RUNNING, s.GATHERING] |
| job_or_task_id = job_directories.get_job_id_or_task_id(options.results) |
| job_overhead.record_state_duration( |
| job_or_task_id, machines[0], status, duration_secs, |
| is_special_task=is_special_task) |
| |
| |
| def main(): |
| start_time = datetime.datetime.now() |
| # White list of tests with run time measurement enabled. |
| measure_run_time_tests_names = global_config.global_config.get_config_value( |
| 'AUTOSERV', 'measure_run_time_tests', type=str) |
| if measure_run_time_tests_names: |
| measure_run_time_tests = [t.strip() for t in |
| measure_run_time_tests_names.split(',')] |
| else: |
| measure_run_time_tests = [] |
| # grab the parser |
| parser = autoserv_parser.autoserv_parser |
| parser.parse_args() |
| |
| if len(sys.argv) == 1: |
| parser.parser.print_help() |
| sys.exit(1) |
| |
| # If the job requires to run with server-side package, try to stage server- |
| # side package first. If that fails with error that autotest server package |
| # does not exist, fall back to run the job without using server-side package |
| ssp_url = None |
| if parser.options.require_ssp: |
| ssp_url = _stage_ssp(parser) |
| if not ssp_url: |
| # The build does not have autotest server package. Fall back to not |
| # to use server-side package, reset logging to log in results |
| # folder. |
| logging.warn( |
| 'Autoserv is required to run with server-side packaging. ' |
| 'However, no server-side package can be found based on ' |
| '`--image`, host attribute job_repo_url or host label of ' |
| 'cros-version. The test will be executed without ' |
| 'server-side packaging supported.') |
| |
| if parser.options.no_logging: |
| results = None |
| else: |
| results = parser.options.results |
| if not results: |
| results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') |
| results = os.path.abspath(results) |
| resultdir_exists = False |
| for filename in ('control.srv', 'status.log', '.autoserv_execute'): |
| if os.path.exists(os.path.join(results, filename)): |
| resultdir_exists = True |
| if not parser.options.use_existing_results and resultdir_exists: |
| error = "Error: results directory already exists: %s\n" % results |
| sys.stderr.write(error) |
| sys.exit(1) |
| |
| # Now that we certified that there's no leftover results dir from |
| # previous jobs, lets create the result dir since the logging system |
| # needs to create the log file in there. |
| if not os.path.isdir(results): |
| os.makedirs(results) |
| |
| if parser.options.require_ssp and ssp_url: |
| log_dir = os.path.join(results, 'wrapper') if results else None |
| if log_dir and not os.path.exists(log_dir): |
| os.makedirs(log_dir) |
| else: |
| log_dir = results |
| logging_manager.configure_logging( |
| server_logging_config.ServerLoggingConfig(), |
| results_dir=log_dir, |
| use_console=not parser.options.no_tee, |
| verbose=parser.options.verbose, |
| no_console_prefix=parser.options.no_console_prefix) |
| |
| if results: |
| logging.info("Results placed in %s" % results) |
| |
| # wait until now to perform this check, so it get properly logged |
| if (parser.options.use_existing_results and not resultdir_exists and |
| not lxc.is_in_container()): |
| logging.error("No existing results directory found: %s", results) |
| sys.exit(1) |
| |
| logging.debug('autoserv is running in drone %s.', socket.gethostname()) |
| logging.debug('autoserv command was: %s', ' '.join(sys.argv)) |
| |
| if parser.options.write_pidfile and results: |
| pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, |
| results) |
| pid_file_manager.open_file() |
| else: |
| pid_file_manager = None |
| |
| autotest.BaseAutotest.set_install_in_tmpdir( |
| parser.options.install_in_tmpdir) |
| |
| timer = None |
| try: |
| # Take the first argument as control file name, get the test name from |
| # the control file. If the test name exists in the list of tests with |
| # run time measurement enabled, start a timer to begin measurement. |
| if (len(parser.args) > 0 and parser.args[0] != '' and |
| parser.options.machines): |
| try: |
| test_name = control_data.parse_control(parser.args[0], |
| raise_warnings=True).name |
| except control_data.ControlVariableException: |
| logging.debug('Failed to retrieve test name from control file.') |
| test_name = None |
| if test_name in measure_run_time_tests: |
| machines = parser.options.machines.replace(',', ' ' |
| ).strip().split() |
| try: |
| afe = frontend.AFE() |
| board = server_utils.get_board_from_afe(machines[0], afe) |
| timer = autotest_stats.Timer('autoserv_run_time.%s.%s' % |
| (board, test_name)) |
| timer.start() |
| except (urllib2.HTTPError, urllib2.URLError): |
| # Ignore error if RPC failed to get board |
| pass |
| except control_data.ControlVariableException as e: |
| logging.error(str(e)) |
| exit_code = 0 |
| # TODO(beeps): Extend this to cover different failure modes. |
| # Testing exceptions are matched against labels sent to autoserv. Eg, |
| # to allow only the hostless job to run, specify |
| # testing_exceptions: test_suite in the shadow_config. To allow both |
| # the hostless job and dummy_Pass to run, specify |
| # testing_exceptions: test_suite,dummy_Pass. You can figure out |
| # what label autoserv is invoked with by looking through the logs of a test |
| # for the autoserv command's -l option. |
| testing_exceptions = global_config.global_config.get_config_value( |
| 'AUTOSERV', 'testing_exceptions', type=list, default=[]) |
| test_mode = global_config.global_config.get_config_value( |
| 'AUTOSERV', 'testing_mode', type=bool, default=False) |
| test_mode = (results_mocker and test_mode and not |
| any([ex in parser.options.label |
| for ex in testing_exceptions])) |
| is_task = (parser.options.verify or parser.options.repair or |
| parser.options.provision or parser.options.reset or |
| parser.options.cleanup or parser.options.collect_crashinfo) |
| try: |
| try: |
| if test_mode: |
| # The parser doesn't run on tasks anyway, so we can just return |
| # happy signals without faking results. |
| if not is_task: |
| machine = parser.options.results.split('/')[-1] |
| |
| # TODO(beeps): The proper way to do this would be to |
| # refactor job creation so we can invoke job.record |
| # directly. To do that one needs to pipe the test_name |
| # through run_autoserv and bail just before invoking |
| # the server job. See the comment in |
| # puppylab/results_mocker for more context. |
| results_mocker.ResultsMocker( |
| test_name if test_name else 'unknown-test', |
| parser.options.results, machine |
| ).mock_results() |
| return |
| else: |
| run_autoserv(pid_file_manager, results, parser, ssp_url) |
| except SystemExit as e: |
| exit_code = e.code |
| if exit_code: |
| logging.exception(e) |
| except Exception as e: |
| # If we don't know what happened, we'll classify it as |
| # an 'abort' and return 1. |
| logging.exception(e) |
| exit_code = 1 |
| finally: |
| if pid_file_manager: |
| pid_file_manager.close_file(exit_code) |
| if timer: |
| timer.stop() |
| # Record the autoserv duration time. Must be called |
| # just before the system exits to ensure accuracy. |
| duration_secs = (datetime.datetime.now() - start_time).total_seconds() |
| record_autoserv(parser.options, duration_secs) |
| sys.exit(exit_code) |
| |
| |
| if __name__ == '__main__': |
| main() |