[autotest] Update autoserv to support server-side packaging. Code change is made so autoserv will honer the --require-ssp arg. When the argument is specified, autoserv will: 1. Try to locate server-side package based on --image, host attribute job_repo_url and host's cros-version label. 2. If no server-side package can be staged, the job will be run without ssp. 3. If package can be staged, autoserv will start a container to run the test. a) parent autoserv process's log will be stored in [results]/wrapper folder. b) A container will be created from base container, with proper setup. c) The same autoserv command line (with update on result path and control etc.), will be executed inside the container. d) Parent autoserv process will wait for above command to finish and destroy the container afterwards. If the test job was aborted, container should also be destroyed. BUG=chromium:453624 TEST=local setup and run test command: /usr/local/autotest/server/autoserv -p -r /usr/local/autotest/results/16-debug_\ user/$result_dir -m 172.27.215.232 -u udebug_user -l dummy_PassServer -s -P \ 16-debug_user/$result_dir -n /usr/local/autotest/results/drone_tmp/attach.2 \ --verify_job_repo_url --require-ssp create job from afe: http://dshi.mtv.corp.google.com/afe/#tab_id=view_job&object_id=19 Change-Id: Ida36374dd500cd1fd0b67a86ab8c0198ade4cc36 Reviewed-on: https://chromium-review.googlesource.com/259834 Reviewed-by: Dan Shi <dshi@chromium.org> Commit-Queue: Dan Shi <dshi@chromium.org> Trybot-Ready: Dan Shi <dshi@chromium.org> Tested-by: Dan Shi <dshi@chromium.org>

commit: cf4d2032ea4bf5af680383f36308d581876bbbb0 [log] [tgz]
author: Dan Shi <dshi@chromium.org> Thu Mar 12 15:04:21 2015 -0700
committer: ChromeOS Commit Bot <chromeos-commit-bot@chromium.org> Wed Mar 18 00:49:23 2015 +0000
tree: 402b9cb36d6aa3cf64c7512c3d6db8af43069618
parent: 560129f6ae4955f32018c6cbbfd19e21d1810333 [diff] [blame]
diff --git a/server/autoserv b/server/autoserv
index 63748b0..d9b967d 100755
--- a/server/autoserv
+++ b/server/autoserv

@@ -13,6 +13,7 @@
 import os
 import re
 import signal
+import socket
 import sys
 import traceback
 import time
@@ -34,7 +35,6 @@
 # Number of seconds to wait before returning if testing mode is enabled
 TESTING_MODE_SLEEP_SECS = 1
 
-
 try:
     import atfork
     atfork.monkeypatch_os_fork_functions()
@@ -56,15 +56,142 @@
 from autotest_lib.server import server_logging_config
 from autotest_lib.server import server_job, utils, autoserv_parser, autotest
 from autotest_lib.server import utils as server_utils
+from autotest_lib.site_utils import job_directories
 from autotest_lib.site_utils import job_overhead
+from autotest_lib.site_utils import lxc
 from autotest_lib.client.common_lib import pidfile, logging_manager
 from autotest_lib.client.common_lib.cros.graphite import autotest_stats
 
+# Control segment to stage server-side package.
+STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
+        'stage_server_side_package')
+
 def log_alarm(signum, frame):
     logging.error("Received SIGALARM. Ignoring and continuing on.")
     sys.exit(1)
 
-def run_autoserv(pid_file_manager, results, parser):
+
+def _get_machines(parser):
+    """Get a list of machine names from command line arg -m or a file.
+
+    @param parser: Parser for the command line arguments.
+
+    @return: A list of machine names from command line arg -m or the
+             machines file specified in the command line arg -M.
+    """
+    if parser.options.machines:
+        machines = parser.options.machines.replace(',', ' ').strip().split()
+    else:
+        machines = []
+    machines_file = parser.options.machines_file
+    if machines_file:
+        machines = []
+        for m in open(machines_file, 'r').readlines():
+            # remove comments, spaces
+            m = re.sub('#.*', '', m).strip()
+            if m:
+                machines.append(m)
+        logging.debug('Read list of machines from file: %s', machines_file)
+        logging.debug('Machines: %s', ','.join(machines))
+
+    if machines:
+        for machine in machines:
+            if not machine or re.search('\s', machine):
+                parser.parser.error("Invalid machine: %s" % str(machine))
+        machines = list(set(machines))
+        machines.sort()
+    return machines
+
+
+def _stage_ssp(parser):
+    """Stage server-side package.
+
+    This function calls a control segment to stage server-side package based on
+    the job and autoserv command line option. The detail implementation could
+    be different for each host type. Currently, only CrosHost has
+    stage_server_side_package function defined.
+    The script returns None if no server-side package is available. However,
+    it may raise exception if it failed for reasons other than artifact (the
+    server-side package) not found.
+
+    @param parser: Command line arguments parser passed in the autoserv process.
+
+    @return: url of the staged server-side package. Return None if server-
+             side package is not found for the build.
+    """
+    namespace = {'machines': _get_machines(parser),
+                 'image': parser.options.image}
+    script_locals = {}
+    execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
+    return script_locals['ssp_url']
+
+
+def _run_with_ssp(container_name, job_id, results, parser, ssp_url):
+    """Run the server job with server-side packaging.
+
+    @param container_name: Name of the container to run the test.
+    @param job_id: ID of the test job.
+    @param results: Folder to store results. This could be different from
+                    parser.options.results:
+                    parser.options.results  can be set to None for results to be
+                    stored in a temp folder.
+                    results can be None for autoserv run requires no logging.
+    @param parser: Command line parser that contains the options.
+    @param ssp_url: url of the staged server-side package.
+    """
+    bucket = lxc.ContainerBucket()
+    control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
+               else None)
+    test_container = bucket.setup_test(container_name, job_id, ssp_url, results,
+                                       control=control)
+    args = sys.argv[:]
+    args.remove('--require-ssp')
+
+    # A dictionary of paths to replace in the command line. Key is the path to
+    # be replaced with the one in value.
+    paths_to_replace = {}
+    # Replace the control file path with the one in container.
+    if control:
+        container_control_filename = os.path.join(
+                lxc.CONTROL_TEMP_PATH, os.path.basename(control))
+        paths_to_replace[control] = container_control_filename
+    # Update result directory with the one in container.
+    if parser.options.results:
+        container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
+        paths_to_replace[parser.options.results] = container_result_dir
+    # Update parse_job directory with the one in container. The assumption is
+    # that the result folder to be parsed is always the same as the results_dir.
+    if parser.options.parse_job:
+        container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
+        paths_to_replace[parser.options.parse_job] = container_result_dir
+
+    args = [paths_to_replace.get(arg, arg) for arg in args]
+
+    # Apply --use-existing-results, results directory is aready created and
+    # mounted in container. Apply this arg to avoid exception being raised.
+    if not '--use-existing-results' in args:
+        args.append('--use-existing-results')
+
+    # Make sure autoserv running in container using a different pid file.
+    if not '--pidfile-label' in args:
+        args.extend(['--pidfile-label', 'container_autoserv'])
+
+    cmd_line = ' '.join(args)
+    logging.info('Run command in container: %s', cmd_line)
+    try:
+        test_container.attach_run(cmd_line)
+    finally:
+        test_container.destroy()
+
+
+def run_autoserv(pid_file_manager, results, parser, ssp_url):
+    """Run server job with given options.
+
+    @param pid_file_manager: PidFileManager used to monitor the autoserv process
+    @param results: Folder to store results.
+    @param parser: Parser for the command line arguments.
+    @param ssp_url: Url to server-side package.
+    """
     if parser.options.warn_no_ssp:
         # Post a warning in the log and force to not use server-side packaging.
         parser.options.require_ssp = False
@@ -81,6 +208,13 @@
     # Create separate process group
     os.setpgrp()
 
+    # Container name is predefined so the container can be destroyed in
+    # handle_sigterm.
+    job_or_task_id = job_directories.get_job_id_or_task_id(
+            parser.options.results)
+    container_name = (lxc.TEST_CONTAINER_NAME_FMT %
+                      (job_or_task_id, time.time()))
+
     # Implement SIGTERM handler
     def handle_sigterm(signum, frame):
         logging.debug('Received SIGTERM')
@@ -91,6 +225,21 @@
         # This sleep allows the pending output to be logged before the kill
         # signal is sent.
         time.sleep(.1)
+        if parser.options.require_ssp and ssp_url:
+            logging.debug('Destroy container %s before aborting the autoserv '
+                          'process.', container_name)
+            try:
+                bucket = lxc.ContainerBucket()
+                container = bucket.get(container_name)
+                if container:
+                    container.destroy()
+                else:
+                    logging.debug('Container %s is not found.', container_name)
+            except:
+                # Handle any exception so the autoserv process can be aborted.
+                logging.error('Failed to destroy container %s. Error: %s',
+                              container_name, sys.exc_info())
+
         os.killpg(os.getpgrp(), signal.SIGKILL)
 
     # Set signal handler
@@ -115,11 +264,6 @@
     # but depending on how you launch your autotest scheduler it may not be set.
     os.environ['USER'] = getpass.getuser()
 
-    if parser.options.machines:
-        machines = parser.options.machines.replace(',', ' ').strip().split()
-    else:
-        machines = []
-    machines_file = parser.options.machines_file
     label = parser.options.label
     group_name = parser.options.group_name
     user = parser.options.user
@@ -174,30 +318,13 @@
     else:
         control = None
 
-    if machines_file:
-        machines = []
-        for m in open(machines_file, 'r').readlines():
-            # remove comments, spaces
-            m = re.sub('#.*', '', m).strip()
-            if m:
-                machines.append(m)
-        print "Read list of machines from file: %s" % machines_file
-        print ','.join(machines)
-
-    if machines:
-        for machine in machines:
-            if not machine or re.search('\s', machine):
-                parser.parser.error("Invalid machine: %s" % str(machine))
-        machines = list(set(machines))
-        machines.sort()
-
+    machines = _get_machines(parser)
     if group_name and len(machines) < 2:
-        parser.parser.error("-G %r may only be supplied with more than one machine."
-               % group_name)
+        parser.parser.error('-G %r may only be supplied with more than one '
+                            'machine.' % group_name)
 
     kwargs = {'group_name': group_name, 'tag': execution_tag,
-              'disable_sysinfo': parser.options.disable_sysinfo,
-              'require_ssp': parser.options.require_ssp}
+              'disable_sysinfo': parser.options.disable_sysinfo}
     if control_filename:
         kwargs['control_filename'] = control_filename
     job = server_job.server_job(control, parser.args[1:], results, label,
@@ -205,6 +332,7 @@
                                 ssh_user, ssh_port, ssh_pass,
                                 ssh_verbosity_flag, ssh_options,
                                 test_retry, **kwargs)
+
     job.logging.start_logging()
     job.init_parser()
 
@@ -226,11 +354,25 @@
             elif cleanup:
                 job.cleanup(job_labels)
             else:
-                job.run(install_before, install_after,
-                        verify_job_repo_url=verify_job_repo_url,
-                        only_collect_crashinfo=collect_crashinfo,
-                        skip_crash_collection=skip_crash_collection,
-                        job_labels=job_labels)
+                # Server-side packaging will only be used if it's required and
+                # the package is available.
+                if parser.options.require_ssp and ssp_url:
+                    try:
+                        _run_with_ssp(container_name, job_or_task_id, results,
+                                      parser, ssp_url)
+                    finally:
+                        # Update the ownership of files in result folder.
+                        # TODO(dshi): crbug.com/459344 Skip following action
+                        # when test container can be unprivileged container.
+                        if results:
+                            lxc.run('chown -R %s %s' % (os.getuid(), results))
+                            lxc.run('chgrp -R %s %s' % (os.getgid(), results))
+                else:
+                    job.run(install_before, install_after,
+                            verify_job_repo_url=verify_job_repo_url,
+                            only_collect_crashinfo=collect_crashinfo,
+                            skip_crash_collection=skip_crash_collection,
+                            job_labels=job_labels)
         finally:
             while job.hosts:
                 host = job.hosts.pop()
@@ -247,30 +389,6 @@
     sys.exit(exit_code)
 
 
-def _get_job_id_or_task_id(result_dir, machine, is_special_task):
-    """Extract job id or special task id from result_dir
-
-    @param result_dir: path to the result dir.
-    @param machine: hostname of the machine.
-    @param is_special_task: True/False, whether it is a special task.
-
-    @returns: integer representing the job id or task id.
-    """
-    if not result_dir:
-        return
-    result_dir = os.path.abspath(result_dir)
-    if is_special_task:
-        # special task result dir is like
-        # /usr/local/autotest/results/hosts/chromeos1-rack5-host6/1343-cleanup
-        pattern = '.*/hosts/%s/(\d+)-[^/]+' % machine
-    else:
-        # non-special task result dir is like
-        # /usr/local/autotest/results/2032-chromeos-test/chromeos1-rack5-host6
-        pattern ='.*/(\d+)-[^/]+/%s' % machine
-    m = re.match(pattern, result_dir)
-    return int(m.group(1)) if m else None
-
-
 def record_autoserv(options, duration_secs):
     """Record autoserv end-to-end time in metadata db.
 
@@ -298,8 +416,7 @@
     match = filter(lambda task: option_dict.get(task) == True, task_mapping)
     status = task_mapping[match[0]] if match else s.RUNNING
     is_special_task = status not in [s.RUNNING, s.GATHERING]
-    job_or_task_id = _get_job_id_or_task_id(
-            options.results, machines[0], is_special_task)
+    job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
     job_overhead.record_state_duration(
             job_or_task_id, machines[0], status, duration_secs,
             is_special_task=is_special_task)
@@ -323,6 +440,23 @@
         parser.parser.print_help()
         sys.exit(1)
 
+    # If the job requires to run with server-side package, try to stage server-
+    # side package first. If that fails with error that autotest server package
+    # does not exist, fall back to run the job without using server-side package
+    ssp_url = None
+    if parser.options.require_ssp:
+        ssp_url = _stage_ssp(parser)
+        if not ssp_url:
+            # The build does not have autotest server package. Fall back to not
+            # to use server-side package, reset logging to log in results
+            # folder.
+            logging.warn(
+                    'Autoserv is required to run with server-side packaging. '
+                    'However, no server-side package can be found based on '
+                    '`--image`, host attribute job_repo_url or host label of '
+                    'cros-version. The test will be executed without '
+                    'server-side packaging supported.')
+
     if parser.options.no_logging:
         results = None
     else:
@@ -345,22 +479,32 @@
         if not os.path.isdir(results):
             os.makedirs(results)
 
+    if parser.options.require_ssp and ssp_url:
+        log_dir = os.path.join(results, 'wrapper') if results else None
+        if log_dir and not os.path.exists(log_dir):
+            os.makedirs(log_dir)
+    else:
+        log_dir = results
     logging_manager.configure_logging(
-            server_logging_config.ServerLoggingConfig(), results_dir=results,
+            server_logging_config.ServerLoggingConfig(),
+            results_dir=log_dir,
             use_console=not parser.options.no_tee,
             verbose=parser.options.verbose,
             no_console_prefix=parser.options.no_console_prefix)
+
     if results:
         logging.info("Results placed in %s" % results)
 
         # wait until now to perform this check, so it get properly logged
-        if parser.options.use_existing_results and not resultdir_exists:
+        if (parser.options.use_existing_results and not resultdir_exists and
+            not lxc.is_in_container()):
             logging.error("No existing results directory found: %s", results)
             sys.exit(1)
 
+    logging.debug('autoserv is running in drone %s.', socket.gethostname())
     logging.debug('autoserv command was: %s', ' '.join(sys.argv))
 
-    if parser.options.write_pidfile:
+    if parser.options.write_pidfile and results:
         pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
                                                   results)
         pid_file_manager.open_file()
@@ -436,7 +580,7 @@
                             ).mock_results()
                 return
             else:
-                run_autoserv(pid_file_manager, results, parser)
+                run_autoserv(pid_file_manager, results, parser, ssp_url)
         except SystemExit as e:
             exit_code = e.code
             if exit_code:
commit	cf4d2032ea4bf5af680383f36308d581876bbbb0	[log] [tgz]
author	Dan Shi <dshi@chromium.org>	Thu Mar 12 15:04:21 2015 -0700
committer	ChromeOS Commit Bot <chromeos-commit-bot@chromium.org>	Wed Mar 18 00:49:23 2015 +0000
tree	402b9cb36d6aa3cf64c7512c3d6db8af43069618
parent	560129f6ae4955f32018c6cbbfd19e21d1810333 [diff] [blame]