[autotest] Update autoserv to support server-side packaging.
Code change is made so autoserv will honer the --require-ssp arg. When the
argument is specified, autoserv will:
1. Try to locate server-side package based on --image, host attribute
job_repo_url and host's cros-version label.
2. If no server-side package can be staged, the job will be run without ssp.
3. If package can be staged, autoserv will start a container to run the test.
a) parent autoserv process's log will be stored in [results]/wrapper folder.
b) A container will be created from base container, with proper setup.
c) The same autoserv command line (with update on result path and control
etc.), will be executed inside the container.
d) Parent autoserv process will wait for above command to finish and destroy
the container afterwards. If the test job was aborted, container should
also be destroyed.
BUG=chromium:453624
TEST=local setup and run test command:
/usr/local/autotest/server/autoserv -p -r /usr/local/autotest/results/16-debug_\
user/$result_dir -m 172.27.215.232 -u udebug_user -l dummy_PassServer -s -P \
16-debug_user/$result_dir -n /usr/local/autotest/results/drone_tmp/attach.2 \
--verify_job_repo_url --require-ssp
create job from afe:
http://dshi.mtv.corp.google.com/afe/#tab_id=view_job&object_id=19
Change-Id: Ida36374dd500cd1fd0b67a86ab8c0198ade4cc36
Reviewed-on: https://chromium-review.googlesource.com/259834
Reviewed-by: Dan Shi <dshi@chromium.org>
Commit-Queue: Dan Shi <dshi@chromium.org>
Trybot-Ready: Dan Shi <dshi@chromium.org>
Tested-by: Dan Shi <dshi@chromium.org>
diff --git a/server/autoserv b/server/autoserv
index 63748b0..d9b967d 100755
--- a/server/autoserv
+++ b/server/autoserv
@@ -13,6 +13,7 @@
import os
import re
import signal
+import socket
import sys
import traceback
import time
@@ -34,7 +35,6 @@
# Number of seconds to wait before returning if testing mode is enabled
TESTING_MODE_SLEEP_SECS = 1
-
try:
import atfork
atfork.monkeypatch_os_fork_functions()
@@ -56,15 +56,142 @@
from autotest_lib.server import server_logging_config
from autotest_lib.server import server_job, utils, autoserv_parser, autotest
from autotest_lib.server import utils as server_utils
+from autotest_lib.site_utils import job_directories
from autotest_lib.site_utils import job_overhead
+from autotest_lib.site_utils import lxc
from autotest_lib.client.common_lib import pidfile, logging_manager
from autotest_lib.client.common_lib.cros.graphite import autotest_stats
+# Control segment to stage server-side package.
+STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE = server_job._control_segment_path(
+ 'stage_server_side_package')
+
def log_alarm(signum, frame):
logging.error("Received SIGALARM. Ignoring and continuing on.")
sys.exit(1)
-def run_autoserv(pid_file_manager, results, parser):
+
+def _get_machines(parser):
+ """Get a list of machine names from command line arg -m or a file.
+
+ @param parser: Parser for the command line arguments.
+
+ @return: A list of machine names from command line arg -m or the
+ machines file specified in the command line arg -M.
+ """
+ if parser.options.machines:
+ machines = parser.options.machines.replace(',', ' ').strip().split()
+ else:
+ machines = []
+ machines_file = parser.options.machines_file
+ if machines_file:
+ machines = []
+ for m in open(machines_file, 'r').readlines():
+ # remove comments, spaces
+ m = re.sub('#.*', '', m).strip()
+ if m:
+ machines.append(m)
+ logging.debug('Read list of machines from file: %s', machines_file)
+ logging.debug('Machines: %s', ','.join(machines))
+
+ if machines:
+ for machine in machines:
+ if not machine or re.search('\s', machine):
+ parser.parser.error("Invalid machine: %s" % str(machine))
+ machines = list(set(machines))
+ machines.sort()
+ return machines
+
+
+def _stage_ssp(parser):
+ """Stage server-side package.
+
+ This function calls a control segment to stage server-side package based on
+ the job and autoserv command line option. The detail implementation could
+ be different for each host type. Currently, only CrosHost has
+ stage_server_side_package function defined.
+ The script returns None if no server-side package is available. However,
+ it may raise exception if it failed for reasons other than artifact (the
+ server-side package) not found.
+
+ @param parser: Command line arguments parser passed in the autoserv process.
+
+ @return: url of the staged server-side package. Return None if server-
+ side package is not found for the build.
+ """
+ namespace = {'machines': _get_machines(parser),
+ 'image': parser.options.image}
+ script_locals = {}
+ execfile(STAGE_SERVER_SIDE_PACKAGE_CONTROL_FILE, namespace, script_locals)
+ return script_locals['ssp_url']
+
+
+def _run_with_ssp(container_name, job_id, results, parser, ssp_url):
+ """Run the server job with server-side packaging.
+
+ @param container_name: Name of the container to run the test.
+ @param job_id: ID of the test job.
+ @param results: Folder to store results. This could be different from
+ parser.options.results:
+ parser.options.results can be set to None for results to be
+ stored in a temp folder.
+ results can be None for autoserv run requires no logging.
+ @param parser: Command line parser that contains the options.
+ @param ssp_url: url of the staged server-side package.
+ """
+ bucket = lxc.ContainerBucket()
+ control = (parser.args[0] if len(parser.args) > 0 and parser.args[0] != ''
+ else None)
+ test_container = bucket.setup_test(container_name, job_id, ssp_url, results,
+ control=control)
+ args = sys.argv[:]
+ args.remove('--require-ssp')
+
+ # A dictionary of paths to replace in the command line. Key is the path to
+ # be replaced with the one in value.
+ paths_to_replace = {}
+ # Replace the control file path with the one in container.
+ if control:
+ container_control_filename = os.path.join(
+ lxc.CONTROL_TEMP_PATH, os.path.basename(control))
+ paths_to_replace[control] = container_control_filename
+ # Update result directory with the one in container.
+ if parser.options.results:
+ container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
+ paths_to_replace[parser.options.results] = container_result_dir
+ # Update parse_job directory with the one in container. The assumption is
+ # that the result folder to be parsed is always the same as the results_dir.
+ if parser.options.parse_job:
+ container_parse_dir = os.path.join(lxc.RESULT_DIR_FMT % job_id)
+ paths_to_replace[parser.options.parse_job] = container_result_dir
+
+ args = [paths_to_replace.get(arg, arg) for arg in args]
+
+ # Apply --use-existing-results, results directory is aready created and
+ # mounted in container. Apply this arg to avoid exception being raised.
+ if not '--use-existing-results' in args:
+ args.append('--use-existing-results')
+
+ # Make sure autoserv running in container using a different pid file.
+ if not '--pidfile-label' in args:
+ args.extend(['--pidfile-label', 'container_autoserv'])
+
+ cmd_line = ' '.join(args)
+ logging.info('Run command in container: %s', cmd_line)
+ try:
+ test_container.attach_run(cmd_line)
+ finally:
+ test_container.destroy()
+
+
+def run_autoserv(pid_file_manager, results, parser, ssp_url):
+ """Run server job with given options.
+
+ @param pid_file_manager: PidFileManager used to monitor the autoserv process
+ @param results: Folder to store results.
+ @param parser: Parser for the command line arguments.
+ @param ssp_url: Url to server-side package.
+ """
if parser.options.warn_no_ssp:
# Post a warning in the log and force to not use server-side packaging.
parser.options.require_ssp = False
@@ -81,6 +208,13 @@
# Create separate process group
os.setpgrp()
+ # Container name is predefined so the container can be destroyed in
+ # handle_sigterm.
+ job_or_task_id = job_directories.get_job_id_or_task_id(
+ parser.options.results)
+ container_name = (lxc.TEST_CONTAINER_NAME_FMT %
+ (job_or_task_id, time.time()))
+
# Implement SIGTERM handler
def handle_sigterm(signum, frame):
logging.debug('Received SIGTERM')
@@ -91,6 +225,21 @@
# This sleep allows the pending output to be logged before the kill
# signal is sent.
time.sleep(.1)
+ if parser.options.require_ssp and ssp_url:
+ logging.debug('Destroy container %s before aborting the autoserv '
+ 'process.', container_name)
+ try:
+ bucket = lxc.ContainerBucket()
+ container = bucket.get(container_name)
+ if container:
+ container.destroy()
+ else:
+ logging.debug('Container %s is not found.', container_name)
+ except:
+ # Handle any exception so the autoserv process can be aborted.
+ logging.error('Failed to destroy container %s. Error: %s',
+ container_name, sys.exc_info())
+
os.killpg(os.getpgrp(), signal.SIGKILL)
# Set signal handler
@@ -115,11 +264,6 @@
# but depending on how you launch your autotest scheduler it may not be set.
os.environ['USER'] = getpass.getuser()
- if parser.options.machines:
- machines = parser.options.machines.replace(',', ' ').strip().split()
- else:
- machines = []
- machines_file = parser.options.machines_file
label = parser.options.label
group_name = parser.options.group_name
user = parser.options.user
@@ -174,30 +318,13 @@
else:
control = None
- if machines_file:
- machines = []
- for m in open(machines_file, 'r').readlines():
- # remove comments, spaces
- m = re.sub('#.*', '', m).strip()
- if m:
- machines.append(m)
- print "Read list of machines from file: %s" % machines_file
- print ','.join(machines)
-
- if machines:
- for machine in machines:
- if not machine or re.search('\s', machine):
- parser.parser.error("Invalid machine: %s" % str(machine))
- machines = list(set(machines))
- machines.sort()
-
+ machines = _get_machines(parser)
if group_name and len(machines) < 2:
- parser.parser.error("-G %r may only be supplied with more than one machine."
- % group_name)
+ parser.parser.error('-G %r may only be supplied with more than one '
+ 'machine.' % group_name)
kwargs = {'group_name': group_name, 'tag': execution_tag,
- 'disable_sysinfo': parser.options.disable_sysinfo,
- 'require_ssp': parser.options.require_ssp}
+ 'disable_sysinfo': parser.options.disable_sysinfo}
if control_filename:
kwargs['control_filename'] = control_filename
job = server_job.server_job(control, parser.args[1:], results, label,
@@ -205,6 +332,7 @@
ssh_user, ssh_port, ssh_pass,
ssh_verbosity_flag, ssh_options,
test_retry, **kwargs)
+
job.logging.start_logging()
job.init_parser()
@@ -226,11 +354,25 @@
elif cleanup:
job.cleanup(job_labels)
else:
- job.run(install_before, install_after,
- verify_job_repo_url=verify_job_repo_url,
- only_collect_crashinfo=collect_crashinfo,
- skip_crash_collection=skip_crash_collection,
- job_labels=job_labels)
+ # Server-side packaging will only be used if it's required and
+ # the package is available.
+ if parser.options.require_ssp and ssp_url:
+ try:
+ _run_with_ssp(container_name, job_or_task_id, results,
+ parser, ssp_url)
+ finally:
+ # Update the ownership of files in result folder.
+ # TODO(dshi): crbug.com/459344 Skip following action
+ # when test container can be unprivileged container.
+ if results:
+ lxc.run('chown -R %s %s' % (os.getuid(), results))
+ lxc.run('chgrp -R %s %s' % (os.getgid(), results))
+ else:
+ job.run(install_before, install_after,
+ verify_job_repo_url=verify_job_repo_url,
+ only_collect_crashinfo=collect_crashinfo,
+ skip_crash_collection=skip_crash_collection,
+ job_labels=job_labels)
finally:
while job.hosts:
host = job.hosts.pop()
@@ -247,30 +389,6 @@
sys.exit(exit_code)
-def _get_job_id_or_task_id(result_dir, machine, is_special_task):
- """Extract job id or special task id from result_dir
-
- @param result_dir: path to the result dir.
- @param machine: hostname of the machine.
- @param is_special_task: True/False, whether it is a special task.
-
- @returns: integer representing the job id or task id.
- """
- if not result_dir:
- return
- result_dir = os.path.abspath(result_dir)
- if is_special_task:
- # special task result dir is like
- # /usr/local/autotest/results/hosts/chromeos1-rack5-host6/1343-cleanup
- pattern = '.*/hosts/%s/(\d+)-[^/]+' % machine
- else:
- # non-special task result dir is like
- # /usr/local/autotest/results/2032-chromeos-test/chromeos1-rack5-host6
- pattern ='.*/(\d+)-[^/]+/%s' % machine
- m = re.match(pattern, result_dir)
- return int(m.group(1)) if m else None
-
-
def record_autoserv(options, duration_secs):
"""Record autoserv end-to-end time in metadata db.
@@ -298,8 +416,7 @@
match = filter(lambda task: option_dict.get(task) == True, task_mapping)
status = task_mapping[match[0]] if match else s.RUNNING
is_special_task = status not in [s.RUNNING, s.GATHERING]
- job_or_task_id = _get_job_id_or_task_id(
- options.results, machines[0], is_special_task)
+ job_or_task_id = job_directories.get_job_id_or_task_id(options.results)
job_overhead.record_state_duration(
job_or_task_id, machines[0], status, duration_secs,
is_special_task=is_special_task)
@@ -323,6 +440,23 @@
parser.parser.print_help()
sys.exit(1)
+ # If the job requires to run with server-side package, try to stage server-
+ # side package first. If that fails with error that autotest server package
+ # does not exist, fall back to run the job without using server-side package
+ ssp_url = None
+ if parser.options.require_ssp:
+ ssp_url = _stage_ssp(parser)
+ if not ssp_url:
+ # The build does not have autotest server package. Fall back to not
+ # to use server-side package, reset logging to log in results
+ # folder.
+ logging.warn(
+ 'Autoserv is required to run with server-side packaging. '
+ 'However, no server-side package can be found based on '
+ '`--image`, host attribute job_repo_url or host label of '
+ 'cros-version. The test will be executed without '
+ 'server-side packaging supported.')
+
if parser.options.no_logging:
results = None
else:
@@ -345,22 +479,32 @@
if not os.path.isdir(results):
os.makedirs(results)
+ if parser.options.require_ssp and ssp_url:
+ log_dir = os.path.join(results, 'wrapper') if results else None
+ if log_dir and not os.path.exists(log_dir):
+ os.makedirs(log_dir)
+ else:
+ log_dir = results
logging_manager.configure_logging(
- server_logging_config.ServerLoggingConfig(), results_dir=results,
+ server_logging_config.ServerLoggingConfig(),
+ results_dir=log_dir,
use_console=not parser.options.no_tee,
verbose=parser.options.verbose,
no_console_prefix=parser.options.no_console_prefix)
+
if results:
logging.info("Results placed in %s" % results)
# wait until now to perform this check, so it get properly logged
- if parser.options.use_existing_results and not resultdir_exists:
+ if (parser.options.use_existing_results and not resultdir_exists and
+ not lxc.is_in_container()):
logging.error("No existing results directory found: %s", results)
sys.exit(1)
+ logging.debug('autoserv is running in drone %s.', socket.gethostname())
logging.debug('autoserv command was: %s', ' '.join(sys.argv))
- if parser.options.write_pidfile:
+ if parser.options.write_pidfile and results:
pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label,
results)
pid_file_manager.open_file()
@@ -436,7 +580,7 @@
).mock_results()
return
else:
- run_autoserv(pid_file_manager, results, parser)
+ run_autoserv(pid_file_manager, results, parser, ssp_url)
except SystemExit as e:
exit_code = e.code
if exit_code: